Skip to content

Commit

Permalink
added creators field
Browse files Browse the repository at this point in the history
  • Loading branch information
Taniya-Das committed Nov 27, 2023
1 parent 6056fcc commit 5d40691
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 25 deletions.
1 change: 0 additions & 1 deletion src/connectors/abstract/resource_connector_by_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def run(
"Limiting the results! Please remove the limit command line argument "
"in production."
)

first_run = not state
if first_run and from_identifier is None:
raise ValueError("In the first run, the from-identifier needs to be set")
Expand Down
48 changes: 27 additions & 21 deletions src/connectors/openml/openml_mlmodel_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
from database.model.concept.aiod_entry import AIoDEntryCreate
from database.model.models_and_experiments.ml_model import MLModel

# from database.model.ai_resource.resource import AbstractAIResource
# from database.model.agent.agent import Agent
# from database.model.agent.contact import Contact
from database.model.agent.contact import Contact
from database.model.models_and_experiments.runnable_distribution import RunnableDistribution
from database.model.platform.platform_names import PlatformName
from database.model.resource_read_and_create import resource_create
from connectors.resource_with_relations import ResourceWithRelations


class OpenMlMLModelConnector(ResourceConnectorById[MLModel]):
Expand All @@ -38,10 +37,10 @@ def resource_class(self) -> type[MLModel]:
def platform_name(self) -> PlatformName:
return PlatformName.openml

def retry(self, identifier: int) -> SQLModel | RecordError:
def retry(self, identifier: int) -> ResourceWithRelations[SQLModel] | RecordError:
return self.fetch_record(identifier)

def fetch_record(self, identifier: int) -> SQLModel | RecordError:
def fetch_record(self, identifier: int) -> ResourceWithRelations[MLModel] | RecordError:
url_mlmodel = f"https://www.openml.org/api/v1/json/flow/{identifier}"
response = requests.get(url_mlmodel)
if not response.ok:
Expand Down Expand Up @@ -88,19 +87,20 @@ def fetch_record(self, identifier: int) -> SQLModel | RecordError:
)
]

# creator_mlmodel = None
# if "creator" and "contributor" in mlmodel_json:
# creator_names = (
# [mlmodel_json["creator"]] + mlmodel_json["contributor"]
# if "contributor" in mlmodel_json
# else [mlmodel_json["creator"]]
# )
# creator_names_ = []
# for c in creator_names:
# creator_names_.append(Contact(name=c))

# creator_mlmodel = [Agent(name="creators of the mlmodel", creator=creator_names_)]
return pydantic_class(
creator_names = []
if "creator" and "contributor" in mlmodel_json:
creators = (
[mlmodel_json["creator"]] + mlmodel_json["contributor"]
if "contributor" in mlmodel_json
else [mlmodel_json["creator"]]
)
for name in creators:
creator_names.append(Contact(name=name))

# creator_mlmodel =

pydantic_class = resource_create(MLModel)
mlmodel = pydantic_class(
aiod_entry=AIoDEntryCreate(
status="published",
),
Expand All @@ -117,7 +117,13 @@ def fetch_record(self, identifier: int) -> SQLModel | RecordError:
version=mlmodel_json["version"],
)

def fetch(self, offset: int, from_identifier: int) -> Iterator[SQLModel | RecordError]:
return ResourceWithRelations[MLModel](
resource=mlmodel, related_resources={"creator": creator_names}
)

def fetch(
self, offset: int, from_identifier: int
) -> Iterator[ResourceWithRelations[SQLModel] | RecordError]:
url_mlmodel = (
"https://www.openml.org/api/v1/json/flow/list/"
f"limit/{self.limit_per_iteration}/offset/{offset}"
Expand All @@ -139,9 +145,9 @@ def fetch(self, offset: int, from_identifier: int) -> Iterator[SQLModel | Record

for summary in mlmodel_summaries:
identifier = None
# ToDo: dicuss how to accomodate pipelines. Excluding pipelines for now.
# ToDo: dicuss how to accomodate pipelines. Excluding sklearn pipelines for now.
# Note: weka doesn't have a standard method to define pipeline.
# There are no mlr pipeline in OpenML.
# There are no mlr pipelines in OpenML.
if "sklearn.pipeline" not in summary["name"]:
try:
identifier = summary["id"]
Expand Down
6 changes: 3 additions & 3 deletions src/tests/connectors/openml/test_openml_mlmodel_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_first_run():
mock_get_data(mocked_requests, str(i))
mlmodels = list(connector.run(state={}, from_identifier=0, limit=None))

assert {m.name for m in mlmodels} == {
assert {m.resource.name for m in mlmodels} == {
"openml.evaluation.EuclideanDistance",
"openml.evaluation.PolynomialKernel",
"openml.evaluation.RBFKernel",
Expand All @@ -35,7 +35,7 @@ def test_second_run():
connector.run(state={"offset": 2, "last_id": 2}, from_identifier=0, limit=None)
)
assert len(mlmodels) == 1
assert {m.name for m in mlmodels} == {"openml.evaluation.RBFKernel"}
assert {m.resource.name for m in mlmodels} == {"openml.evaluation.RBFKernel"}


def test_second_run_wrong_identifier():
Expand All @@ -47,7 +47,7 @@ def test_second_run_wrong_identifier():
connector.run(state={"offset": 2, "last_id": 0}, from_identifier=0, limit=None)
)
assert len(mlmodels) == 1
assert {m.name for m in mlmodels} == {"openml.evaluation.RBFKernel"}
assert {m.resource.name for m in mlmodels} == {"openml.evaluation.RBFKernel"}


def mock_list_data(mocked_requests, offset):
Expand Down

0 comments on commit 5d40691

Please sign in to comment.