From 83904b7f351c9ea8b9ac7737892b2b21caedb720 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Wed, 18 Dec 2024 17:02:16 -0500 Subject: [PATCH 1/3] fix(env) Fix forms hook env var default config (#12155) --- .../configuration/src/main/resources/application.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 75b4c8e8b002f9..9010d77015f16c 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -561,7 +561,7 @@ springdoc.api-docs.groups.enabled: true forms: hook: - enabled: { $FORMS_HOOK_ENABLED:true } + enabled: ${FORMS_HOOK_ENABLED:true} consumerGroupSuffix: ${FORMS_HOOK_CONSUMER_GROUP_SUFFIX:} businessAttribute: From da8f8221977444644596da40e676e15362bd7a2d Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 18 Dec 2024 14:36:10 -0800 Subject: [PATCH 2/3] feat(ingest/mlflow): Support configurable base_external_url (#12167) --- .../src/datahub/ingestion/source/mlflow.py | 35 ++++++++++++++++--- .../tests/unit/test_mlflow_source.py | 13 +++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py index cef6d2b1bb5774..26d160acf330cf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py @@ -38,16 +38,30 @@ class MLflowConfig(EnvConfigMixin): tracking_uri: Optional[str] = Field( default=None, - description="Tracking server URI. If not set, an MLflow default tracking_uri is used (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)", + description=( + "Tracking server URI. If not set, an MLflow default tracking_uri is used" + " (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)" + ), ) registry_uri: Optional[str] = Field( default=None, - description="Registry server URI. If not set, an MLflow default registry_uri is used (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)", + description=( + "Registry server URI. If not set, an MLflow default registry_uri is used" + " (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)" + ), ) model_name_separator: str = Field( default="_", description="A string which separates model name from its version (e.g. model_1 or model-1)", ) + base_external_url: Optional[str] = Field( + default=None, + description=( + "Base URL to use when constructing external URLs to MLflow." + " If not set, tracking_uri is used if it's an HTTP URL." + " If neither is set, external URLs are not generated." + ), + ) @dataclass @@ -279,12 +293,23 @@ def _make_ml_model_urn(self, model_version: ModelVersion) -> str: ) return urn - def _make_external_url(self, model_version: ModelVersion) -> Union[None, str]: + def _get_base_external_url_from_tracking_uri(self) -> Optional[str]: + if isinstance( + self.client.tracking_uri, str + ) and self.client.tracking_uri.startswith("http"): + return self.client.tracking_uri + else: + return None + + def _make_external_url(self, model_version: ModelVersion) -> Optional[str]: """ Generate URL for a Model Version to MLflow UI. """ - base_uri = self.client.tracking_uri - if base_uri.startswith("http"): + base_uri = ( + self.config.base_external_url + or self._get_base_external_url_from_tracking_uri() + ) + if base_uri: return f"{base_uri.rstrip('/')}/#/models/{model_version.name}/versions/{model_version.version}" else: return None diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py index d213dd92352e62..e882296b6f331d 100644 --- a/metadata-ingestion/tests/unit/test_mlflow_source.py +++ b/metadata-ingestion/tests/unit/test_mlflow_source.py @@ -136,3 +136,16 @@ def test_make_external_link_remote(source, model_version): url = source._make_external_url(model_version) assert url == expected_url + + +def test_make_external_link_remote_via_config(source, model_version): + custom_base_url = "https://custom-server.org" + source.config.base_external_url = custom_base_url + source.client = MlflowClient( + tracking_uri="https://dummy-mlflow-tracking-server.org" + ) + expected_url = f"{custom_base_url}/#/models/{model_version.name}/versions/{model_version.version}" + + url = source._make_external_url(model_version) + + assert url == expected_url From 4392d72456faae5f0f59eb09756287182feec56b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 18 Dec 2024 20:29:34 -0500 Subject: [PATCH 3/3] fix(cli/properties): fix data type validation (#12170) --- .../structuredproperties.py | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index e37281dea86e1f..619f69b016262d 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -14,7 +14,7 @@ PropertyValueClass, StructuredPropertyDefinitionClass, ) -from datahub.metadata.urns import StructuredPropertyUrn, Urn +from datahub.metadata.urns import DataTypeUrn, StructuredPropertyUrn, Urn from datahub.utilities.urns._urn_base import URN_TYPES logging.basicConfig(level=logging.INFO) @@ -86,19 +86,31 @@ class StructuredProperties(ConfigModel): @validator("type") def validate_type(cls, v: str) -> str: - # Convert to lowercase if needed - if not v.islower(): + # This logic is somewhat hacky, since we need to deal with + # 1. fully qualified urns + # 2. raw data types, that need to get the datahub namespace prefix + # While keeping the user-facing interface and error messages clean. + + if not v.startswith("urn:li:") and not v.islower(): + # Convert to lowercase if needed + v = v.lower() logger.warning( - f"Structured property type should be lowercase. Updated to {v.lower()}" + f"Structured property type should be lowercase. Updated to {v}" ) - v = v.lower() + + urn = Urn.make_data_type_urn(v) # Check if type is allowed - if not AllowedTypes.check_allowed_type(v): + data_type_urn = DataTypeUrn.from_string(urn) + unqualified_data_type = data_type_urn.id + if unqualified_data_type.startswith("datahub."): + unqualified_data_type = unqualified_data_type[len("datahub.") :] + if not AllowedTypes.check_allowed_type(unqualified_data_type): raise ValueError( - f"Type {v} is not allowed. Allowed types are {AllowedTypes.values()}" + f"Type {unqualified_data_type} is not allowed. Allowed types are {AllowedTypes.values()}" ) - return v + + return urn @property def fqn(self) -> str: