Skip to content

Commit

Permalink
excluding sklearn pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
Taniya-Das committed Nov 27, 2023
1 parent 3dcdf0c commit 88a47d8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
1 change: 0 additions & 1 deletion src/connectors/abstract/resource_connector_by_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def run(
"Limiting the results! Please remove the limit command line argument "
"in production."
)

first_run = not state
if first_run and from_identifier is None:
raise ValueError("In the first run, the from-identifier needs to be set")
Expand Down
36 changes: 28 additions & 8 deletions src/connectors/openml/openml_mlmodel_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from database.model.concept.aiod_entry import AIoDEntryCreate
from database.model.models_and_experiments.ml_model import MLModel

# from database.model.ai_resource.resource import AbstractAIResource
# from database.model.agent.agent import Agent
# from database.model.agent.contact import Contact
from database.model.models_and_experiments.runnable_distribution import RunnableDistribution
from database.model.platform.platform_names import PlatformName
from database.model.resource_read_and_create import resource_create
Expand Down Expand Up @@ -85,6 +88,18 @@ def fetch_record(self, identifier: int) -> SQLModel | RecordError:
)
]

# creator_mlmodel = None
# if "creator" and "contributor" in mlmodel_json:
# creator_names = (
# [mlmodel_json["creator"]] + mlmodel_json["contributor"]
# if "contributor" in mlmodel_json
# else [mlmodel_json["creator"]]
# )
# creator_names_ = []
# for c in creator_names:
# creator_names_.append(Contact(name=c))

# creator_mlmodel = [Agent(name="creators of the mlmodel", creator=creator_names_)]
return pydantic_class(
aiod_entry=AIoDEntryCreate(
status="published",
Expand Down Expand Up @@ -124,14 +139,19 @@ def fetch(self, offset: int, from_identifier: int) -> Iterator[SQLModel | Record

for summary in mlmodel_summaries:
identifier = None
try:
identifier = summary["id"]
if identifier < from_identifier:
yield RecordError(identifier=identifier, error="Id too low", ignore=True)
if from_identifier is None or identifier >= from_identifier:
yield self.fetch_record(identifier)
except Exception as e:
yield RecordError(identifier=identifier, error=e)
# ToDo: dicuss how to accomodate pipelines. Excluding pipelines for now.
# Note: weka doesn't have a standard method to define pipeline.
# There are no mlr pipeline in OpenML.
if "sklearn.pipeline" not in summary["name"]:
try:
identifier = summary["id"]

if identifier < from_identifier:
yield RecordError(identifier=identifier, error="Id too low", ignore=True)
if from_identifier is None or identifier >= from_identifier:
yield self.fetch_record(identifier)
except Exception as e:
yield RecordError(identifier=identifier, error=e)


def _as_int(v: str) -> int:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def test_first_run():
mock_list_data(mocked_requests, offset)
for i in range(1, 4):
mock_get_data(mocked_requests, str(i))

mlmodels = list(connector.run(state={}, from_identifier=0, limit=None))

assert {m.name for m in mlmodels} == {
Expand Down

0 comments on commit 88a47d8

Please sign in to comment.