excluding sklearn pipelines

aiondemand · Nov 27, 2023 · 88a47d8 · 88a47d8
1 parent 3dcdf0c
commit 88a47d8
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 10 deletions.
diff --git a/src/connectors/abstract/resource_connector_by_id.py b/src/connectors/abstract/resource_connector_by_id.py
@@ -32,7 +32,6 @@ def run(
                 "Limiting the results! Please remove the limit command line argument "
                 "in production."
             )
-
         first_run = not state
         if first_run and from_identifier is None:
             raise ValueError("In the first run, the from-identifier needs to be set")

diff --git a/src/connectors/openml/openml_mlmodel_connector.py b/src/connectors/openml/openml_mlmodel_connector.py
@@ -15,6 +15,9 @@
 from database.model.concept.aiod_entry import AIoDEntryCreate
 from database.model.models_and_experiments.ml_model import MLModel
 
+# from database.model.ai_resource.resource import AbstractAIResource
+# from database.model.agent.agent import Agent
+# from database.model.agent.contact import Contact
 from database.model.models_and_experiments.runnable_distribution import RunnableDistribution
 from database.model.platform.platform_names import PlatformName
 from database.model.resource_read_and_create import resource_create
@@ -85,6 +88,18 @@ def fetch_record(self, identifier: int) -> SQLModel | RecordError:
                 )
             ]
 
+        # creator_mlmodel = None
+        # if "creator" and "contributor" in mlmodel_json:
+        #     creator_names = (
+        #         [mlmodel_json["creator"]] + mlmodel_json["contributor"]
+        #         if "contributor" in mlmodel_json
+        #         else [mlmodel_json["creator"]]
+        #     )
+        #     creator_names_ = []
+        #     for c in creator_names:
+        #         creator_names_.append(Contact(name=c))
+
+        #     creator_mlmodel = [Agent(name="creators of the mlmodel", creator=creator_names_)]
         return pydantic_class(
             aiod_entry=AIoDEntryCreate(
                 status="published",
@@ -124,14 +139,19 @@ def fetch(self, offset: int, from_identifier: int) -> Iterator[SQLModel | Record
 
         for summary in mlmodel_summaries:
             identifier = None
-            try:
-                identifier = summary["id"]
-                if identifier < from_identifier:
-                    yield RecordError(identifier=identifier, error="Id too low", ignore=True)
-                if from_identifier is None or identifier >= from_identifier:
-                    yield self.fetch_record(identifier)
-            except Exception as e:
-                yield RecordError(identifier=identifier, error=e)
+            # ToDo: dicuss how to accomodate pipelines. Excluding pipelines for now.
+            # Note: weka doesn't have a standard method to define pipeline.
+            # There are no mlr pipeline in OpenML.
+            if "sklearn.pipeline" not in summary["name"]:
+                try:
+                    identifier = summary["id"]
+
+                    if identifier < from_identifier:
+                        yield RecordError(identifier=identifier, error="Id too low", ignore=True)
+                    if from_identifier is None or identifier >= from_identifier:
+                        yield self.fetch_record(identifier)
+                except Exception as e:
+                    yield RecordError(identifier=identifier, error=e)
 
 
 def _as_int(v: str) -> int:

diff --git a/src/tests/connectors/openml/test_openml_mlmodel_connector.py b/src/tests/connectors/openml/test_openml_mlmodel_connector.py
@@ -15,7 +15,6 @@ def test_first_run():
             mock_list_data(mocked_requests, offset)
         for i in range(1, 4):
             mock_get_data(mocked_requests, str(i))
-
         mlmodels = list(connector.run(state={}, from_identifier=0, limit=None))
 
     assert {m.name for m in mlmodels} == {