diff --git a/CHANGELOG.md b/CHANGELOG.md index ab7b07daa..01cd994ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,17 @@ # Changelog of pm4py -## pm4py 2.7.6 (2023.07.XX) +## pm4py 2.7.6 (2023.08.XX) ### Added * 6760518dea19334a21442200bef647e4c07f3636 * LLM abstraction of the temporal profile model * 13d001c76e3de40786dce75e76e56a13a821173a * set of event logs for fairness assessment (hospital, hiring, lending, renting) +* e3044278b3e7d984c7fdf9e39554cc4551332739 + * added OCEL filters: + * length of a connected components + * presence of at least an object of a given object type ### Changed * 84629e2ea342348e30aa04a7d41ad7b39159b400 @@ -16,7 +20,11 @@ * refactored log_to_interval_tree methods in two methods (log to intervals, and intervals to tree) * added queue-related examples - +* da3a12f615dba3c46793a2d9977dfca11dad85b0 + * avoid annotation start/end edges in DFG with performance metrics +* 37fba9285cfde95309142e4404f9cfbcb2b9296c + * visualizations support nanoseconds granularity when needed + ### Deprecated ### Fixed @@ -34,14 +42,20 @@ * fixed path to Graphviz.JS * ca79aa9b9e51ba3a95665d5d53c8e5ab5028bf12 * minor fix TBR generalization parameters +* 57a30fb452a759bc71f707e67bf0f63118194b7f + * method to sample OCEL connected components is fixed ### Removed +* bf5574a34a31b93024dd9feb54acc5cc475640bd + * change-of-mind on format_dataframe deprecation warning ### Other * 916ea3163119afe7aa0fc9f6c43624147d6c0f9f * reference to published paper in OCEL feature extraction * 549aa7c6766f1a51425a7a65673173c55d9731e9 * updated reference to PM4Py website +* 20ce84db4e195937c77280c950ff12083fc5833b + * example for log granularity change --- diff --git a/docs/source/api.rst b/docs/source/api.rst index 1ba8b6d71..b1b2edf53 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -222,7 +222,8 @@ Also, some filtering techniques are offered on top of object-centric event logs: * :meth:`pm4py.filtering.filter_ocel_events`; filters a specified collection of event identifiers from the object-centric event log. * :meth:`pm4py.filtering.filter_ocel_objects`; filters a specified collection of object identifiers from the object-centric event log. * :meth:`pm4py.filtering.filter_ocel_cc_object`; filters a connected component from the object-centric event log to which the object with the provided identifier belongs. - + * :meth:`pm4py.filtering.filter_ocel_cc_length`; filter the connected components from an object-centric event log having a number of objects falling in a provided range. + * :meth:`pm4py.filtering.filter_ocel_cc_otype`; filter the connected components from an object-centric event log having at least an object of the specified object type. Machine Learning (:mod:`pm4py.ml`) ------------------------------------------ @@ -547,6 +548,8 @@ Overall List of Methods pm4py.filtering.filter_ocel_events pm4py.filtering.filter_ocel_objects pm4py.filtering.filter_ocel_cc_object + pm4py.filtering.filter_ocel_cc_length + pm4py.filtering.filter_ocel_cc_otype pm4py.ml pm4py.ml.split_train_test pm4py.ml.get_prefixes_from_log diff --git a/examples/activities_to_alphabet.py b/examples/activities_to_alphabet.py new file mode 100644 index 000000000..561ca614d --- /dev/null +++ b/examples/activities_to_alphabet.py @@ -0,0 +1,13 @@ +import pm4py +from pm4py.objects.log.util import activities_to_alphabet +from pm4py.util import constants + + +def execute_script(): + dataframe = pm4py.read_xes("../tests/input_data/running-example.xes") + renamed_dataframe = activities_to_alphabet.apply(dataframe, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}) + print(renamed_dataframe) + + +if __name__ == "__main__": + execute_script() diff --git a/examples/ocel_filter_cc.py b/examples/ocel_filter_cc.py new file mode 100644 index 000000000..3603e4b6a --- /dev/null +++ b/examples/ocel_filter_cc.py @@ -0,0 +1,19 @@ +import pm4py +import sys + + +def execute_script(): + ocel = pm4py.read_ocel("../tests/input_data/ocel/example_log.jsonocel") + print(ocel) + # filters the connected components of the OCEL in which there is at least a delivery, + # obtaining a filtered OCEL back. + ocel_with_del = pm4py.filter_ocel_cc_otype(ocel, "delivery") + print(ocel_with_del) + # filters the connected components of the OCEL with at least five different objects, + # obtaining a filtered OCEL back. + ocel_with_three_objs = pm4py.filter_ocel_cc_length(ocel, 5, sys.maxsize) + print(ocel_with_three_objs) + + +if __name__ == "__main__": + execute_script() diff --git a/examples/timestamp_granularity.py b/examples/timestamp_granularity.py new file mode 100644 index 000000000..0e9c87d61 --- /dev/null +++ b/examples/timestamp_granularity.py @@ -0,0 +1,41 @@ +import pandas as pd +import pm4py +import time + + +def execute_script(): + dataframe = pd.read_csv("../tests/input_data/receipt.csv") + dataframe = pm4py.format_dataframe(dataframe) + + # prints the original timestamp column of the dataframe + print(dataframe["time:timestamp"]) + + # Here are some common options that you can use as a granularity: + # + # 'D': Day + # 'H': Hour + # 'T' or 'min': Minute + # 'S': Second + # 'L' or 'ms': Millisecond + # 'U': Microsecond + # 'N': Nanosecond + + st = time.time_ns() + # cast on the minute + dataframe["time:timestamp"] = dataframe["time:timestamp"].dt.floor('T') + ct = time.time_ns() + + print("required time for the timestamp casting: %.2f seconds" % ((ct-st)/10**9)) + + # prints the new timestamp column of the dataframe + print(dataframe["time:timestamp"]) + + # for completeness, we report some alternatives methods in Pandas to do the same (casting on the minute): + # + # dataframe["time:timestamp"] = dataframe["time:timestamp"].apply(lambda x: x.replace(second=0, microsecond=0)) + # + # dataframe["time:timestamp"] = dataframe["time:timestamp"].dt.round('min') + + +if __name__ == "__main__": + execute_script() diff --git a/pm4py/__init__.py b/pm4py/__init__.py index d2084a27b..ce60b90fc 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -30,7 +30,7 @@ filter_ocel_object_types_allowed_activities, filter_ocel_object_per_type_count, filter_ocel_start_events_per_object_type, \ filter_ocel_end_events_per_object_type, filter_ocel_events_timestamp, filter_prefixes, filter_suffixes, \ filter_four_eyes_principle, filter_activity_done_different_resources, filter_ocel_events, filter_ocel_objects, \ - filter_ocel_object_types, filter_ocel_cc_object + filter_ocel_object_types, filter_ocel_cc_object, filter_ocel_cc_length, filter_ocel_cc_otype from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_ilp, discover_petri_net_heuristics, \ discover_petri_net_inductive, discover_process_tree_inductive, discover_heuristics_net, \ discover_dfg, discover_footprints, discover_eventually_follows_graph, discover_directly_follows_graph, discover_bpmn_inductive, \ diff --git a/pm4py/algo/discovery/batches/variants/pandas.py b/pm4py/algo/discovery/batches/variants/pandas.py index daf61b0e9..8b2333281 100644 --- a/pm4py/algo/discovery/batches/variants/pandas.py +++ b/pm4py/algo/discovery/batches/variants/pandas.py @@ -94,9 +94,14 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A attributes_to_consider.add(event_id_key) log = log[list(attributes_to_consider)] - log[timestamp_key] = log[timestamp_key].values.astype(np.int64) // 10**9 + # the timestamp columns are expressed in nanoseconds values + # here, we want them to have the second granularity, so we divide by 10**9 + # for example 1001000000 nanoseconds (value stored in the column) + # is equivalent to 1,001 seconds. + log[timestamp_key] = log[timestamp_key].values.astype(np.int64) / 10**9 if start_timestamp_key != timestamp_key: - log[start_timestamp_key] = log[start_timestamp_key].values.astype(np.int64) // 10**9 + # see the aforementioned explanation. + log[start_timestamp_key] = log[start_timestamp_key].values.astype(np.int64) / 10**9 actres_grouping0 = log.groupby([activity_key, resource_key]).agg(list).to_dict() start_timestamps = actres_grouping0[start_timestamp_key] diff --git a/pm4py/analysis.py b/pm4py/analysis.py index e8b945acc..b8398afef 100644 --- a/pm4py/analysis.py +++ b/pm4py/analysis.py @@ -17,7 +17,7 @@ __doc__ = """ """ -from typing import List, Optional, Tuple, Dict, Union, Generator, Set +from typing import List, Optional, Tuple, Dict, Union, Generator, Set, Any from pm4py.objects.log.obj import Trace, EventLog, EventStream from pm4py.objects.conversion.log import converter as log_converter @@ -153,7 +153,7 @@ def solve_extended_marking_equation(trace: Trace, sync_net: PetriNet, sync_im: M def check_soundness(petri_net: PetriNet, initial_marking: Marking, - final_marking: Marking) -> bool: + final_marking: Marking, print_diagnostics: bool = False) -> Tuple[bool, Dict[str, Any]]: """ Check if a given Petri net is a sound WF-net. A Petri net is a WF-net iff: @@ -165,11 +165,15 @@ def check_soundness(petri_net: PetriNet, initial_marking: Marking, - it contains no deadlocks - we are able to always reach the final marking For a formal definition of sound WF-net, consider: http://www.padsweb.rwth-aachen.de/wvdaalst/publications/p628.pdf + In the returned object, the first element is a boolean indicating if the Petri net is a sound workflow net. + The second element is a set of diagnostics collected while running WOFLAN + (expressed as a dictionary associating the keys [name of the diagnostics] with the corresponding diagnostics). :param petri_net: petri net :param initial_marking: initial marking :param final_marking: final marking - :rtype: ``bool`` + :param print_diagnostics: boolean value that sets up additional prints during the execution of WOFLAN + :rtype: ``Tuple[bool, Dict[str, Any]]`` .. code-block:: python3 @@ -179,7 +183,8 @@ def check_soundness(petri_net: PetriNet, initial_marking: Marking, is_sound = pm4py.check_soundness(net, im, fm) """ from pm4py.algo.analysis.woflan import algorithm as woflan - return woflan.apply(petri_net, initial_marking, final_marking, parameters={"return_asap_when_not_sound": True, "return_diagnostics": True}) + return woflan.apply(petri_net, initial_marking, final_marking, + parameters={"return_asap_when_not_sound": True, "return_diagnostics": True, "print_diagnostics": print_diagnostics}) def cluster_log(log: Union[EventLog, EventStream, pd.DataFrame], sklearn_clusterer=None, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> Generator[EventLog, None, None]: diff --git a/pm4py/filtering.py b/pm4py/filtering.py index 1498b7077..65e6e4684 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -1114,3 +1114,81 @@ def filter_ocel_cc_object(ocel: OCEL, object_id: str) -> OCEL: for cc in ocel_splits: if object_id in cc.objects[ocel.object_id_column].unique(): return cc + + +def filter_ocel_cc_length(ocel: OCEL, min_cc_length: int, max_cc_length: int) -> OCEL: + """ + Keeps only the objects in an OCEL belonging to a connected component with a length + falling in a specified range + + Paper: + Adams, Jan Niklas, et al. "Defining cases and variants for object-centric event data." 2022 4th International Conference on Process Mining (ICPM). IEEE, 2022. + + :param ocel: object-centric event log + :param min_cc_length: minimum allowed length for the connected component + :param max_cc_length: maximum allowed length for the connected component + :rtype: ``OCEL`` + + .. code-block:: python3 + + import pm4py + + ocel = pm4py.read_ocel('log.jsonocel') + filtered_ocel = pm4py.filter_ocel_cc_length(ocel, 2, 10) + """ + from pm4py.algo.transformation.ocel.graphs import object_interaction_graph + import networkx as nx + + g0 = object_interaction_graph.apply(ocel) + g = nx.Graph() + + for edge in g0: + g.add_edge(edge[0], edge[1]) + + conn_comp = list(nx.connected_components(g)) + conn_comp = [x for x in conn_comp if min_cc_length <= len(x) <= max_cc_length] + objs = [y for x in conn_comp for y in x] + + return filter_ocel_objects(ocel, objs) + + +def filter_ocel_cc_otype(ocel: OCEL, otype: str, positive: bool = True) -> OCEL: + """ + Filters the objects belonging to the connected components having at least an object + of the provided object type. + + Paper: + Adams, Jan Niklas, et al. "Defining cases and variants for object-centric event data." 2022 4th International Conference on Process Mining (ICPM). IEEE, 2022. + + :param ocel: object-centric event log + :param otype: object type + :param positive: boolean that keeps or discards the objects of these components + :rtype: ``OCEL`` + + .. code-block:: python3 + + import pm4py + + ocel = pm4py.read_ocel('log.jsonocel') + filtered_ocel = pm4py.filter_ocel_cc_otype(ocel, 'order') + """ + if positive: + objs = set(ocel.objects[ocel.objects[ocel.object_type_column] == otype][ocel.object_id_column]) + else: + objs = set(ocel.objects[~(ocel.objects[ocel.object_type_column] == otype)][ocel.object_id_column]) + + from pm4py.algo.transformation.ocel.graphs import object_interaction_graph + import networkx as nx + + g0 = object_interaction_graph.apply(ocel) + g = nx.Graph() + + for edge in g0: + g.add_edge(edge[0], edge[1]) + + conn_comp = list(nx.connected_components(g)) + conn_comp = [x for x in conn_comp if len(set(x).intersection(objs)) > 0] + + objs = [y for x in conn_comp for y in x] + + return filter_ocel_objects(ocel, objs) diff --git a/pm4py/objects/log/util/activities_to_alphabet.py b/pm4py/objects/log/util/activities_to_alphabet.py new file mode 100644 index 000000000..4129428a2 --- /dev/null +++ b/pm4py/objects/log/util/activities_to_alphabet.py @@ -0,0 +1,78 @@ +''' + This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de). + + PM4Py is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PM4Py is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PM4Py. If not, see . +''' + +from enum import Enum +from pm4py.util import exec_utils, xes_constants +import pandas as pd +from typing import Optional, Dict, Any, Union, Tuple + + +class Parameters(Enum): + ACTIVITY_KEY = "activity_key" + RETURN_MAPPING = "return_mapping" + + +def apply(dataframe: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> Union[ + pd.DataFrame, Tuple[pd.DataFrame, Dict[str, str]]]: + """ + Remap the activities in a dataframe using an augmented alphabet to minimize the size of the encoding + + Running example: + + import pm4py + from pm4py.objects.log.util import activities_to_alphabet + from pm4py.util import constants + + dataframe = pm4py.read_xes("tests/input_data/running-example.xes") + renamed_dataframe = activities_to_alphabet.apply(dataframe, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}) + print(renamed_dataframe) + + Parameters + -------------- + dataframe + Pandas dataframe + parameters + Parameters of the method, including: + - Parameters.ACTIVITY_KEY => attribute to be used as activity + - Parameters.RETURN_MAPPING => (boolean) enables the returning the mapping dictionary (so the original activities can be re-constructed) + + Returns + -------------- + ren_dataframe + Pandas dataframe in which the activities have been remapped to the (augmented) alphabet + inv_mapping + (if required) Dictionary associating to every letter of the (augmented) alphabet the original activity + """ + if parameters is None: + parameters = {} + + activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) + return_mapping = exec_utils.get_param_value(Parameters.RETURN_MAPPING, parameters, False) + + activities_count = list(dataframe[activity_key].value_counts().to_dict()) + remap_dict = {} + for index, act in enumerate(activities_count): + result = '' + while index >= 0: + result = chr((index % 26) + ord('A')) + result + index = index // 26 - 1 + remap_dict[act] = result + dataframe[activity_key] = dataframe[activity_key].map(remap_dict) + if return_mapping: + inverse_dct = {y: x for x, y in remap_dict.items()} + return dataframe, inverse_dct + return dataframe diff --git a/pm4py/objects/ocel/util/ocel_to_dict_types_rel.py b/pm4py/objects/ocel/util/ocel_to_dict_types_rel.py new file mode 100644 index 000000000..1907bc550 --- /dev/null +++ b/pm4py/objects/ocel/util/ocel_to_dict_types_rel.py @@ -0,0 +1,110 @@ +''' + This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de). + + PM4Py is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PM4Py is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PM4Py. If not, see . +''' + +from pm4py.objects.ocel.obj import OCEL +from typing import Dict, Union, Tuple +import pandas as pd + + +def apply(ocel: OCEL) -> Dict[str, Dict[Union[str, Tuple[str, str]], pd.DataFrame]]: + """ + Gets from an object-centric event log (OCEL) + a dictionary associating to every event/object/e2o/o2o/change type + a dataframe containing the associated information. + This effectively splits the information of different event/object types + in dense dataframes. + + Running example: + + import pm4py + from pm4py.objects.ocel.util import ocel_to_dict_types_rel + + + ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel") + dct_types_rel = ocel_to_dict_types_rel.apply(ocel) + + # prints the dense dataframes for every event type of the log + for evt, table in dct_types_rel["ev_types"].items(): + print("\n") + print(evt) + print(table) + + Parameters + ------------- + ocel + Object-centric event log + + Returns + ----------- + dct_types_rel + Dictionary associating to every type the corresponding dense table. + + Keys at the first level: + - ev_types: pointing to the different event types of the object-centric event log + - obj_types: pointing to the different object types of the object-centric event log + - e2o: pointing to the different event-object relationships of the object-centric event log + - o2o: pointing to the different object-object relationships of the object-centric event log + - object_changes: pointing to temporal changes in the attributes of the different object types of an OCEL + + Keys at the second level: + - for "ev_types", "obj_types" and "object_changes": the name of the event/object type related to the dense table + - for "e2o": a tuple in which the first element is an event type, and the second element is an object type + - for "o2o": a tuple in which the two elements are interconnected object types + + Value: a Pandas dataframe (dense table). + """ + ev_types_list = list(ocel.events[ocel.event_activity].unique()) + obj_types_list = list(ocel.objects[ocel.object_type_column].unique()) + e2o_list = list(ocel.relations.groupby([ocel.event_activity, ocel.object_type_column]).size().to_dict()) + + obj_type_map = ocel.objects[[ocel.object_id_column, ocel.object_type_column]].to_dict("records") + obj_type_map = {x[ocel.object_id_column]: x[ocel.object_type_column] for x in obj_type_map} + overall_o2o = ocel.o2o.copy() + overall_o2o[ocel.object_type_column] = overall_o2o[ocel.object_id_column].map(obj_type_map) + overall_o2o[ocel.object_type_column+"_2"] = overall_o2o[ocel.object_id_column+"_2"].map(obj_type_map) + + o2o_list = list(overall_o2o.groupby([ocel.object_type_column, ocel.object_type_column+"_2"]).size().to_dict()) + changes_list = list(ocel.object_changes[ocel.object_type_column].unique()) + + dct_types_rel = {"ev_types": {}, "obj_types": {}, "e2o": {}, "o2o": {}, "changes": {}} + + for evt in ev_types_list: + events = ocel.events[ocel.events[ocel.event_activity] == evt] + events.dropna(axis="columns", how="all") + dct_types_rel["ev_types"][evt] = events + + for objt in obj_types_list: + objects = ocel.objects[ocel.objects[ocel.object_type_column] == objt] + objects.dropna(axis="columns", how="all") + dct_types_rel["obj_types"][objt] = objects + + for e2ot in e2o_list: + e2o = ocel.relations[(ocel.relations[ocel.event_activity] == e2ot[0]) & (ocel.relations[ocel.object_type_column] == e2ot[1])] + e2o.dropna(axis="columns", how="all") + dct_types_rel["e2o"][e2ot] = e2o + + for o2ot in o2o_list: + o2o = overall_o2o[(overall_o2o[ocel.object_type_column] == o2ot[0]) & (overall_o2o[ocel.object_type_column+"_2"] == o2ot[1])] + o2o.dropna(axis="columns", how="all") + dct_types_rel["o2o"][o2ot] = o2o + + for objt in changes_list: + objects = ocel.object_changes[ocel.object_changes[ocel.object_type_column] == objt] + objects.dropna(axis="columns", how="all") + dct_types_rel["changes"][objt] = objects + + return dct_types_rel diff --git a/pm4py/ocel.py b/pm4py/ocel.py index f3b8826d6..a17d712d1 100644 --- a/pm4py/ocel.py +++ b/pm4py/ocel.py @@ -25,6 +25,7 @@ from pm4py.objects.ocel.obj import OCEL from pm4py.util import constants import sys +import random def ocel_get_object_types(ocel: OCEL) -> List[str]: @@ -355,13 +356,22 @@ def sample_ocel_objects(ocel: OCEL, num_objects: int) -> OCEL: return sampling.sample_ocel_objects(ocel, parameters={"num_entities": num_objects}) -def sample_ocel_connected_components(ocel: OCEL, connected_components: int = 1) -> OCEL: +def sample_ocel_connected_components(ocel: OCEL, connected_components: int = 1, + max_num_events_per_cc: int = sys.maxsize, + max_num_objects_per_cc: int = sys.maxsize, + max_num_e2o_relations_per_cc: int = sys.maxsize) -> OCEL: """ Given an object-centric event log, returns a sampled event log with a subset of the executions. The number of considered connected components need to be specified by the user. + Paper: + Adams, Jan Niklas, et al. "Defining cases and variants for object-centric event data." 2022 4th International Conference on Process Mining (ICPM). IEEE, 2022. + :param ocel: Object-centric event log :param connected_components: Number of connected components to pick from the OCEL + :param max_num_events_per_cc: maximum number of events allowed per connected component (default: sys.maxsize) + :param max_num_objects_per_cc: maximum number of events allowed per connected component (default: sys.maxsize) + :param max_num_e2o_relations_per_cc: maximum number of event-to-object relationships allowed per connected component (default: sys.maxsize) :rtype: ``OCEL`` .. code-block:: python3 @@ -376,10 +386,14 @@ def sample_ocel_connected_components(ocel: OCEL, connected_components: int = 1) events = None objects = None relations = None - ocel_splits = sorted(list(ocel_splits), key=lambda x: (len(x.events), len(x.relations))) - i = 0 - while i < min(connected_components, len(ocel_splits)): - cc = ocel_splits[i] + ocel_splits = [x for x in ocel_splits if + len(x.events) <= max_num_events_per_cc and len(x.objects) <= max_num_objects_per_cc and len( + x.relations) <= max_num_e2o_relations_per_cc] + + if len(ocel_splits) > 0: + ocel_splits = random.sample(ocel_splits, min(connected_components, len(ocel_splits))) + + for cc in ocel_splits: if events is None: events = cc.events objects = cc.objects @@ -388,7 +402,6 @@ def sample_ocel_connected_components(ocel: OCEL, connected_components: int = 1) events = pd.concat([events, cc.events]) objects = pd.concat([objects, cc.objects]) relations = pd.concat([relations, cc.relations]) - i = i + 1 return OCEL(events, objects, relations) diff --git a/pm4py/util/vis_utils.py b/pm4py/util/vis_utils.py index 42ffc4e4c..8eab41abd 100644 --- a/pm4py/util/vis_utils.py +++ b/pm4py/util/vis_utils.py @@ -18,46 +18,55 @@ import os import subprocess import sys +from typing import Optional, Dict + MAX_EDGE_PENWIDTH_GRAPHVIZ = 2.6 MIN_EDGE_PENWIDTH_GRAPHVIZ = 1.0 -def human_readable_stat(timedelta_seconds, stat_locale: dict = {}): +def human_readable_stat(timedelta, stat_locale: Optional[Dict[str, str]] = None) -> str: """ - Transform a timedelta expressed in seconds into a human readable string + Transform a timedelta into a human readable string Parameters ---------- - timedelta_seconds - Timedelta expressed in seconds - stat_locale - Dict mapping stat strings + timedelta + Timedelta Returns ---------- string Human readable string """ - timedelta_seconds = int(float(timedelta_seconds)) - years = timedelta_seconds // 31104000 - months = timedelta_seconds // 2592000 - days = timedelta_seconds // 86400 - hours = timedelta_seconds // 3600 % 24 - minutes = timedelta_seconds // 60 % 60 - seconds = timedelta_seconds % 60 - + if stat_locale is None: + stat_locale = {} + + c = int(float(timedelta)) + years = c // 31104000 + months = c // 2592000 + days = c // 86400 + hours = c // 3600 % 24 + minutes = c // 60 % 60 + seconds = c % 60 if years > 0: return str(years) + stat_locale.get("year", "Y") - if months > 0: + elif months > 0: return str(months) + stat_locale.get("month", "MO") - if days > 0: + elif days > 0: return str(days) + stat_locale.get("day", "D") - if hours > 0: + elif hours > 0: return str(hours) + stat_locale.get("hour", "h") - if minutes > 0: + elif minutes > 0: return str(minutes) + stat_locale.get("minute", "m") - return str(seconds) + stat_locale.get("second", "s") + elif seconds > 0: + return str(seconds) + stat_locale.get("second", "s") + else: + c = int(float(timedelta*1000)) + if c > 0: + return str(c) + stat_locale.get("millisecond", "ms") + else: + return str(int(float(timedelta * 10**9))) + stat_locale.get("nanosecond", "ns") def get_arc_penwidth(arc_measure, min_arc_measure, max_arc_measure): diff --git a/pm4py/utils.py b/pm4py/utils.py index 3cf834ba7..3861cf665 100644 --- a/pm4py/utils.py +++ b/pm4py/utils.py @@ -35,7 +35,6 @@ CASE_INDEX_COLUMN = "@@case_index" -@deprecation.deprecated(deprecated_in="2.3.0", removed_in="3.0.0", details="the format_dataframe function does not need application anymore.") def format_dataframe(df: pd.DataFrame, case_id: str = constants.CASE_CONCEPT_NAME, activity_key: str = xes_constants.DEFAULT_NAME_KEY, timestamp_key: str = xes_constants.DEFAULT_TIMESTAMP_KEY, diff --git a/pm4py/visualization/dfg/util/dfg_gviz.py b/pm4py/visualization/dfg/util/dfg_gviz.py index 4eb1430d0..efc143d4a 100644 --- a/pm4py/visualization/dfg/util/dfg_gviz.py +++ b/pm4py/visualization/dfg/util/dfg_gviz.py @@ -239,14 +239,14 @@ def graphviz_visualization(activities_count, dfg, image_format="png", measure="f if start_activities_to_include: viz.node("@@startnode", "<●>", shape='circle', fontsize="34") for act in start_activities_to_include: - label = str(start_activities[act]) if isinstance(start_activities, dict) else "" + label = str(start_activities[act]) if isinstance(start_activities, dict) and measure == "frequency" else "" viz.edge("@@startnode", activities_map[act], label=label, fontsize=font_size) if end_activities_to_include: # <■> viz.node("@@endnode", "<■>", shape='doublecircle', fontsize="32") for act in end_activities_to_include: - label = str(end_activities[act]) if isinstance(end_activities, dict) else "" + label = str(end_activities[act]) if isinstance(end_activities, dict) and measure == "frequency" else "" viz.edge(activities_map[act], "@@endnode", label=label, fontsize=font_size) viz.attr(overlap='false') diff --git a/requirements_stable.txt b/requirements_stable.txt index 0503d1153..1ef1b3014 100644 --- a/requirements_stable.txt +++ b/requirements_stable.txt @@ -7,14 +7,14 @@ graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.4 lxml==4.9.3 -matplotlib==3.7.2 +matplotlib==3.7.1 networkx==3.1 numpy==1.25.1 packaging==23.1 pandas==2.0.3 pillow==10.0.0 pydotplus==2.0.2 -pyparsing==3.1.0 +pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3 scipy==1.11.1 diff --git a/safety_checks/20230728 b/safety_checks/20230728 new file mode 100644 index 000000000..df406eb00 --- /dev/null +++ b/safety_checks/20230728 @@ -0,0 +1,47 @@ ++==============================================================================+ + + /$$$$$$ /$$ + /$$__ $$ | $$ + /$$$$$$$ /$$$$$$ | $$ \__//$$$$$$ /$$$$$$ /$$ /$$ + /$$_____/ |____ $$| $$$$ /$$__ $$|_ $$_/ | $$ | $$ + | $$$$$$ /$$$$$$$| $$_/ | $$$$$$$$ | $$ | $$ | $$ + \____ $$ /$$__ $$| $$ | $$_____/ | $$ /$$| $$ | $$ + /$$$$$$$/| $$$$$$$| $$ | $$$$$$$ | $$$$/| $$$$$$$ + |_______/ \_______/|__/ \_______/ \___/ \____ $$ + /$$ | $$ + | $$$$$$/ + by pyup.io \______/ + ++==============================================================================+ + + REPORT + + Safety is using PyUp's free open-source vulnerability database. This +data is 30 days old and limited. + For real-time enhanced vulnerability data, fix recommendations, severity +reporting, cybersecurity support, team and project policy management and more +sign up at https://pyup.io or email sales@pyup.io + + Safety v2.3.5 is scanning for Vulnerabilities... + Scanning dependencies in your files: + + -> requirements_stable.txt + + Using non-commercial database + Found and scanned 25 packages + Timestamp 2023-07-28 13:17:25 + 0 vulnerabilities found + 0 vulnerabilities ignored ++==============================================================================+ + + No known security vulnerabilities found. + ++==============================================================================+ + + Safety is using PyUp's free open-source vulnerability database. This +data is 30 days old and limited. + For real-time enhanced vulnerability data, fix recommendations, severity +reporting, cybersecurity support, team and project policy management and more +sign up at https://pyup.io or email sales@pyup.io + ++==============================================================================+