From 0c483e52b6ea41a4df8b83ad5e39e3c1e2dc5539 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Wed, 30 Aug 2023 07:27:33 +0200 Subject: [PATCH 1/4] fix(pm4py): bug fixes OCEL 2.0 --- pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py | 4 +++- pm4py/objects/ocel/importer/sqlite/variants/ocel20.py | 1 - pm4py/objects/ocel/util/ocel_consistency.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py b/pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py index 247c8b3de..cbd177702 100644 --- a/pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py +++ b/pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py @@ -81,7 +81,9 @@ def apply(ocel: OCEL, file_path: str, parameters: Optional[Dict[Any, Any]] = Non df = ocel.objects[ocel.objects[object_type] == ot].dropna(how="all", axis="columns") df = df.rename(columns={object_id: "ocel_id"}) del df[object_type] - df["ocel_time"] = datetime.fromtimestamp(0) + # Pandas 2.1.0 changes the way that datetime is written to SQL, and this is causing problems. + df["ocel_time"] = datetime.fromtimestamp(129600) + df["ocel_time"] = df["ocel_time"].astype('datetime64[ns]') df2 = ocel.object_changes[ocel.object_changes[object_type] == ot].dropna(how="all", axis="columns") if len(df2) > 0: diff --git a/pm4py/objects/ocel/importer/sqlite/variants/ocel20.py b/pm4py/objects/ocel/importer/sqlite/variants/ocel20.py index 60b1ec180..81e2b0948 100644 --- a/pm4py/objects/ocel/importer/sqlite/variants/ocel20.py +++ b/pm4py/objects/ocel/importer/sqlite/variants/ocel20.py @@ -136,7 +136,6 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None): object_changes = object_changes.sort_values([event_timestamp, internal_index]) del object_changes[internal_index] - E2O.dropna(inplace=True, subset=[event_id, event_activity, event_timestamp, object_id, object_type]) ocel = OCEL(events=event_types_coll, objects=objects, relations=E2O, object_changes=object_changes, o2o=O2O, parameters=parameters) ocel = ocel_consistency.apply(ocel, parameters=parameters) ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters) diff --git a/pm4py/objects/ocel/util/ocel_consistency.py b/pm4py/objects/ocel/util/ocel_consistency.py index 5037e15e3..6a026dd20 100644 --- a/pm4py/objects/ocel/util/ocel_consistency.py +++ b/pm4py/objects/ocel/util/ocel_consistency.py @@ -36,6 +36,7 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: for fie in fields[tab]: df.dropna(subset=[fie], how="any", inplace=True) df[fie] = df[fie].astype("string") + df.dropna(subset=[fie], how="any", inplace=True) df = df[df[fie].str.len() > 0] return ocel From bcd6fe12a3456caf374f2be4dbc208d9cce50fae Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Wed, 30 Aug 2023 07:27:44 +0200 Subject: [PATCH 2/4] removing hard requirement on Pandas --- requirements.txt | 2 +- requirements_complete.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a0e2396bf..2feb81698 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ lxml matplotlib networkx numpy -pandas<=2.0.3 +pandas pydotplus pytz scipy diff --git a/requirements_complete.txt b/requirements_complete.txt index 79ff9ed03..9235d4ddb 100644 --- a/requirements_complete.txt +++ b/requirements_complete.txt @@ -11,7 +11,7 @@ matplotlib networkx numpy packaging -pandas<=2.0.3 +pandas pillow pydotplus pyparsing From 9105eb375cb2fee7d731862b3fe5bf1ce88d455c Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Wed, 30 Aug 2023 07:37:54 +0200 Subject: [PATCH 3/4] fix(pm4py): various bug fixes OCEL support --- pm4py/objects/ocel/util/filtering_utils.py | 6 ++++-- pm4py/objects/ocel/util/ocel_consistency.py | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pm4py/objects/ocel/util/filtering_utils.py b/pm4py/objects/ocel/util/filtering_utils.py index dd5e704f3..860977020 100644 --- a/pm4py/objects/ocel/util/filtering_utils.py +++ b/pm4py/objects/ocel/util/filtering_utils.py @@ -115,10 +115,12 @@ def propagate_relations_filtering(ocel: OCEL, parameters: Optional[Dict[Any, Any event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) - selected_event_ids = set(ocel.relations[event_id].unique()) - selected_object_ids = set(ocel.relations[object_id].unique()) + selected_event_ids = set(ocel.relations[event_id].unique()).intersection(set(ocel.events[event_id].unique())) + selected_object_ids = set(ocel.relations[object_id].unique()).intersection(set(ocel.objects[object_id].unique())) ocel.events = ocel.events[ocel.events[event_id].isin(selected_event_ids)] ocel.objects = ocel.objects[ocel.objects[object_id].isin(selected_object_ids)] + ocel.relations = ocel.relations[ocel.relations[event_id].isin(selected_event_ids)] + ocel.relations = ocel.relations[ocel.relations[object_id].isin(selected_object_ids)] ocel.e2e = ocel.e2e[(ocel.e2e[event_id].isin(selected_event_ids)) & (ocel.e2e[event_id+"_2"].isin(selected_event_ids))] ocel.o2o = ocel.o2o[(ocel.o2o[object_id].isin(selected_object_ids)) & (ocel.o2o[object_id+"_2"].isin(selected_object_ids))] diff --git a/pm4py/objects/ocel/util/ocel_consistency.py b/pm4py/objects/ocel/util/ocel_consistency.py index 6a026dd20..6cb001c2e 100644 --- a/pm4py/objects/ocel/util/ocel_consistency.py +++ b/pm4py/objects/ocel/util/ocel_consistency.py @@ -34,9 +34,10 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: for tab in fields: df = getattr(ocel, tab) for fie in fields[tab]: - df.dropna(subset=[fie], how="any", inplace=True) + df = df.dropna(subset=[fie], how="any") df[fie] = df[fie].astype("string") - df.dropna(subset=[fie], how="any", inplace=True) + df = df.dropna(subset=[fie], how="any") df = df[df[fie].str.len() > 0] + setattr(ocel, tab, df) return ocel From 255ece4879d3e5188ed144da6d51356ba424f72c Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Wed, 30 Aug 2023 07:40:03 +0200 Subject: [PATCH 4/4] updated meta information --- pm4py/meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pm4py/meta.py b/pm4py/meta.py index 041fec9c9..ad75b6853 100644 --- a/pm4py/meta.py +++ b/pm4py/meta.py @@ -1,5 +1,5 @@ __name__ = 'pm4py' -VERSION = '2.7.5.1' +VERSION = '2.7.5.2' __version__ = VERSION __doc__ = 'Process mining for Python' __author__ = 'Fraunhofer Institute for Applied Technology'