Skip to content

Commit

Permalink
[HWORKS-936][APPEND] explicit provenance - model (#235)
Browse files Browse the repository at this point in the history
`get_first` is applied to early for model parents. Provenance methods should return the whole provenance information and only the wrappers should apply `get_first` on provenance.
  • Loading branch information
o-alex authored May 13, 2024
1 parent 0f2490a commit b42f28d
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 137 deletions.
49 changes: 44 additions & 5 deletions python/hsml/core/explicit_provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
#

import json
import logging
from enum import Enum
from typing import Set

import humps


_logger = logging.getLogger(__name__)


class Artifact:
class MetaType(Enum):
DELETED = 1
Expand Down Expand Up @@ -104,11 +108,23 @@ def from_response_json(json_dict: dict):


class Links:
def __init__(self):
self._accessible = []
self._deleted = []
self._inaccessible = []
self._faulty = []
def __init__(self, accessible=None, deleted=None, inaccessible=None, faulty=None):
if accessible is None:
self._accessible = []
else:
self._accessible = accessible
if deleted is None:
self._deleted = []
else:
self._deleted = deleted
if inaccessible is None:
self._inaccessible = []
else:
self._inaccessible = inaccessible
if faulty is None:
self._faulty = []
else:
self._faulty = faulty

@property
def deleted(self):
Expand Down Expand Up @@ -163,6 +179,29 @@ def __repr__(self):
f", {self._inaccessible!r}, {self._faulty!r})"
)

@staticmethod
def get_one_accessible_parent(links):
if links is None:
_logger.info("There is no parent information")
return
elif links.inaccessible or links.deleted:
_logger.info(
"The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method"
)
return None
elif links.accessible:
if len(links.accessible) > 1:
msg = "Backend inconsistency - provenance returned more than one parent"
raise Exception(msg)
parent = links.accessible[0]
if isinstance(parent, Artifact):
msg = "The returned object is not a valid object. For more details get the full provenance from `_provenance` method"
raise Exception(msg)
return parent
else:
_logger.info("There is no parent information")
return None

@staticmethod
def __parse_feature_views(links_json: dict, artifacts: Set[str]):
from hsfs import feature_view
Expand Down
34 changes: 2 additions & 32 deletions python/hsml/core/model_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,34 +232,6 @@ def get_tags(self, model_instance, name: str = None):
)
}

def get_first(self, links):
if links.accessible:
if len(links.accessible) > 1:
raise ValueError(
"provenance returned more than one parent - this is backend inconsistency"
)
return links.accessible[0]
elif links.deleted:
if len(links.deleted) > 1:
raise ValueError(
"provenance returned more than one parent - this is backend inconsistency"
)
return links.deleted[0]
elif links.inaccessible:
if len(links.inaccessible) > 1:
raise ValueError(
"provenance returned more than one parent - this is backend inconsistency"
)
return links.inaccessible[0]
elif links.faulty:
if len(links.faulty) > 1:
raise ValueError(
"provenance returned more than one parent - this is backend inconsistency"
)
return links.faulty[0]
else:
return None

def get_feature_view_provenance(self, model_instance):
"""Get the parent feature view of this model, based on explicit provenance.
These feature views can be accessible, deleted or inaccessible.
Expand Down Expand Up @@ -288,12 +260,11 @@ def get_feature_view_provenance(self, model_instance):
"downstreamLvls": 0,
}
links_json = _client._send_request("GET", path_params, query_params)
links = explicit_provenance.Links.from_response_json(
return explicit_provenance.Links.from_response_json(
links_json,
explicit_provenance.Links.Direction.UPSTREAM,
explicit_provenance.Links.Type.FEATURE_VIEW,
)
return ModelApi.get_first(self, links)

def get_training_dataset_provenance(self, model_instance):
"""Get the parent training dataset of this model, based on explicit provenance.
Expand Down Expand Up @@ -323,9 +294,8 @@ def get_training_dataset_provenance(self, model_instance):
"downstreamLvls": 0,
}
links_json = _client._send_request("GET", path_params, query_params)
links = explicit_provenance.Links.from_response_json(
return explicit_provenance.Links.from_response_json(
links_json,
explicit_provenance.Links.Direction.UPSTREAM,
explicit_provenance.Links.Type.TRAINING_DATASET,
)
return ModelApi.get_first(self, links)
13 changes: 2 additions & 11 deletions python/hsml/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,17 +290,8 @@ def get_feature_view(self):
# Raises
`Exception` in case the backend fails to retrieve the tags.
"""
fv = self.get_feature_view_provenance()
if not fv:
return None
if isinstance(fv, explicit_provenance.Artifact):
msg = (
"The returned object is not a valid feature view - "
+ fv.meta_type
+ ". Call get_feature_view_provenance for the base object"
)
raise Exception(msg)
return fv
fv_prov = self.get_feature_view_provenance()
return explicit_provenance.Links.get_one_accessible_parent(fv_prov)

def get_feature_view_provenance(self):
"""Get the parent feature view of this model, based on explicit provenance.
Expand Down
78 changes: 78 additions & 0 deletions python/tests/test_explicit_provenance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright 2024 Hopsworks AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import unittest
from unittest import mock

from hsml.core import explicit_provenance


class TestExplicitProvenance(unittest.TestCase):
def test_one_accessible_parent(self):
artifact = {"id": 1}
links = explicit_provenance.Links(accessible=[artifact])
parent = explicit_provenance.Links.get_one_accessible_parent(links)
self.assertEqual(artifact["id"], parent["id"])

def test_one_accessible_parent_none(self):
links = explicit_provenance.Links()
with mock.patch.object(explicit_provenance._logger, "info") as mock_logger:
parent = explicit_provenance.Links.get_one_accessible_parent(links)
mock_logger.assert_called_once_with("There is no parent information")
self.assertIsNone(parent)

def test_one_accessible_parent_inaccessible(self):
artifact = {"id": 1}
links = explicit_provenance.Links(inaccessible=[artifact])
with mock.patch.object(explicit_provenance._logger, "info") as mock_logger:
parent = explicit_provenance.Links.get_one_accessible_parent(links)
mock_logger.assert_called_once_with(
"The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method"
)
self.assertIsNone(parent)

def test_one_accessible_parent_deleted(self):
artifact = {"id": 1}
links = explicit_provenance.Links(deleted=[artifact])
with mock.patch.object(explicit_provenance._logger, "info") as mock_logger:
parent = explicit_provenance.Links.get_one_accessible_parent(links)
mock_logger.assert_called_once_with(
"The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method"
)
self.assertIsNone(parent)

def test_one_accessible_parent_too_many(self):
artifact1 = {"id": 1}
artifact2 = {"id": 2}
links = explicit_provenance.Links(accessible=[artifact1, artifact2])
with self.assertRaises(Exception) as context:
explicit_provenance.Links.get_one_accessible_parent(links)
self.assertTrue(
"Backend inconsistency - provenance returned more than one parent"
in context.exception
)

def test_one_accessible_parent_should_not_be_artifact(self):
artifact = explicit_provenance.Artifact(
1, "test", 1, None, explicit_provenance.Artifact.MetaType.NOT_SUPPORTED
)
links = explicit_provenance.Links(accessible=[artifact])
with self.assertRaises(Exception) as context:
explicit_provenance.Links.get_one_accessible_parent(links)
self.assertTrue(
"The returned object is not a valid object. For more details get the full provenance from `_provenance` method"
in context.exception
)
89 changes: 0 additions & 89 deletions python/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
import copy

import humps
import pytest
from hsml import model
from hsml.constants import MODEL
from hsml.core import explicit_provenance


class TestModel:
Expand Down Expand Up @@ -343,94 +341,7 @@ class ClientMock:
mock_client_get_instance.assert_called_once()
mock_util_get_hostname_replaced_url.assert_called_once_with(sub_path=path_arg)

# get feature view

def test_get_feature_view_none(self, mocker, backend_fixtures):
# Arrange
m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0]
mock_get_feature_view_provenance = mocker.patch(
"hsml.model.Model.get_feature_view_provenance", return_value=None
)

# Act
m = model.Model.from_response_json(m_json)
fv = m.get_feature_view()

# Assert
assert fv is None
mock_get_feature_view_provenance.assert_called_once()

def test_get_feature_view(self, mocker, backend_fixtures):
# Arrange
m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0]
mock_fv = mocker.MagicMock()
mock_get_feature_view_provenance = mocker.patch(
"hsml.model.Model.get_feature_view_provenance", return_value=mock_fv
)

# Act
m = model.Model.from_response_json(m_json)
fv = m.get_feature_view()

# Assert
assert fv == mock_fv
mock_get_feature_view_provenance.assert_called_once()

def test_get_feature_view_prov_artifact(self, mocker, backend_fixtures):
# Arrange
m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0]
mock_fv = mocker.MagicMock(spec=explicit_provenance.Artifact)
mock_fv.meta_type = "meta_type"
mocker.patch(
"hsml.model.Model.get_feature_view_provenance", return_value=mock_fv
)

# Act
m = model.Model.from_response_json(m_json)
with pytest.raises(Exception) as e_info:
_ = m.get_feature_view()

# Assert
assert "The returned object is not a valid feature view" in str(e_info.value)

# get feature view provenance

def test_get_feature_view_provenance(self, mocker, backend_fixtures):
# Arrange
m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0]
mock_model_engine_get_feature_view_provenance = mocker.patch(
"hsml.engine.model_engine.ModelEngine.get_feature_view_provenance"
)

# Act
m = model.Model.from_response_json(m_json)
m.get_feature_view_provenance()

# Assert
mock_model_engine_get_feature_view_provenance.assert_called_once_with(
model_instance=m
)

# get training dataset provenance

def test_get_training_dataset_provenance(self, mocker, backend_fixtures):
# Arrange
m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0]
mock_model_engine_get_training_dataset_provenance = mocker.patch(
"hsml.engine.model_engine.ModelEngine.get_training_dataset_provenance"
)

# Act
m = model.Model.from_response_json(m_json)
m.get_training_dataset_provenance()

# Assert
mock_model_engine_get_training_dataset_provenance.assert_called_once_with(
model_instance=m
)

# auxiliary methods

def assert_model(self, mocker, m, m_json, model_framework):
assert isinstance(m, model.Model)
assert m.id == m_json["id"]
Expand Down

0 comments on commit b42f28d

Please sign in to comment.