Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/hotfixes' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Aug 18, 2023
2 parents e3285e8 + 0de0be4 commit f89f476
Show file tree
Hide file tree
Showing 15 changed files with 188 additions and 21 deletions.
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ Also, some filtering techniques are offered on top of object-centric event logs:
* :meth:`pm4py.filtering.filter_ocel_cc_object`; filters a connected component from the object-centric event log to which the object with the provided identifier belongs.
* :meth:`pm4py.filtering.filter_ocel_cc_length`; filter the connected components from an object-centric event log having a number of objects falling in a provided range.
* :meth:`pm4py.filtering.filter_ocel_cc_otype`; filter the connected components from an object-centric event log having at least an object of the specified object type.
* :meth:`pm4py.filtering.filter_ocel_cc_activity`; filter the connected components from an object-centric event log having at least an event with the specified activity.

Machine Learning (:mod:`pm4py.ml`)
------------------------------------------
Expand Down Expand Up @@ -550,6 +551,7 @@ Overall List of Methods
pm4py.filtering.filter_ocel_cc_object
pm4py.filtering.filter_ocel_cc_length
pm4py.filtering.filter_ocel_cc_otype
pm4py.filtering.filter_ocel_cc_activity
pm4py.ml
pm4py.ml.split_train_test
pm4py.ml.get_prefixes_from_log
Expand Down
33 changes: 33 additions & 0 deletions examples/log_skeleton_manual_constraints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pm4py


def execute_script():
# example where the log skeleton is manullay built, and not automatically discovered from the log.

log = pm4py.read_xes("../tests/input_data/running-example.xes")

log_skeleton = {"always_after": set(), "always_before": set(), "equivalence": set(), "never_together": set(),
"directly_follows": set(), "activ_freq": dict()}

for act in pm4py.get_event_attribute_values(log, "concept:name"):
# initially sets that every activity of the log can occur from 0 to 10 times
# (without this constraints, conformance checking will signal deviations for every event)
log_skeleton["activ_freq"][act] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

# sets that the 'reinitiate request' activity should not occur (so it occurs 0 times)
log_skeleton["activ_freq"]["reinitiate request"] = {0}

# sets that the 'pay compensation' activity should occur somewhen after the 'decide' activity.
log_skeleton["always_after"].add(('decide', 'pay compensation'))

# gets the conformance checking results. The first describes for each case of the log the exact deviations
detailed_conf_results = pm4py.conformance_log_skeleton(log, log_skeleton)
print(detailed_conf_results)

# the second provides a summary (as a dataframe) of the fitness per case
summary_df = pm4py.conformance_log_skeleton(log, log_skeleton, return_diagnostics_dataframe=True)
print(summary_df)


if __name__ == "__main__":
execute_script()
55 changes: 55 additions & 0 deletions examples/stochastic_petri_playout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pm4py


def execute_script():
# example on how stochastic Petri nets are defined, discovered and used in pm4py
log = pm4py.read_xes("../tests/input_data/running-example.xes", return_legacy_log_object=True)

# first, we use a traditional process discovery algorithm (the inductive miner) to discover
# a sound workflow net from the event log
net, im, fm = pm4py.discover_petri_net_inductive(log)

# then, we can either define manually the distributions on the stochastic Petri net, or discover them from the log
# in the following lines, we automatically discover the stochastic map from the log
from pm4py.algo.simulation.montecarlo.utils import replay
smap0 = replay.get_map_from_log_and_net(log, net, im, fm)

# each transition of the original Petri net is associated to a stochastic variable, having a priority, a weight
# and a stochastic distribution on the firing times.
for trans in smap0:
print("\n")
print(trans)
print(dir(smap0[trans]))
# priority says: if in a marking a transition with higher priority
# is enabled, it should be considered before all the other transitions
# with lower priority disregarding the weight
print(smap0[trans].get_priority())
# weight sets the probability to fire the transition among all the
# transitions with the same priority
print(smap0[trans].get_weight())
# sets the random variable (independently from the weight)
print(smap0[trans].random_variable)

# as an alternative to discover the stochastic map from the log, we can define manually the stochastic map
# (for example, we set all the invisible to zero firing times, and the other transitions' execution times
# is set to a normal with average 1 and standard deviation 1
from pm4py.objects.random_variables.normal.random_variable import Normal
from pm4py.objects.random_variables.constant0.random_variable import Constant0

smap = {}
for t in net.transitions:
if t.label == "register request" or t.label is None:
v = Constant0()
else:
v = Normal(mu=1, sigma=1)
smap[t] = v

# eventually, we can use the stochastic Petri net with a specialized algorithm,
# such as the stochastic playout
from pm4py.algo.simulation.playout.petri_net.variants import stochastic_playout
ret_log = stochastic_playout.apply(net, im, fm, parameters={"stochastic_map": smap0})
print(ret_log)


if __name__ == "__main__":
execute_script()
28 changes: 28 additions & 0 deletions examples/trace_attrib_hierarch_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pm4py
from pm4py.algo.clustering.trace_attribute_driven import algorithm as clust_algorithm


def recursive_print_clusters(cluster_tree, rec_depth=0):
if rec_depth > 0:
# print the values belonging to the current part of the tree
# (this allows filtering the event log on a given part of the tree)
print("\t"*rec_depth, cluster_tree["name"].split("-"))

for child in cluster_tree["children"]:
recursive_print_clusters(child, rec_depth+1)

if not cluster_tree["children"]:
# there are no children, explicitly tell that
print("\t"*(rec_depth+1), "END")


def execute_script():
log = pm4py.read_xes("../tests/input_data/receipt.xes", return_legacy_log_object=True)
log = pm4py.sample_cases(log, num_cases=20)
# perform hierarchical clustering on the 'responsible' attribute of the log
cluster_tree = clust_algorithm.apply(log, "responsible")[0]
recursive_print_clusters(cluster_tree)


if __name__ == "__main__":
execute_script()
2 changes: 1 addition & 1 deletion pm4py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
filter_ocel_object_types_allowed_activities, filter_ocel_object_per_type_count, filter_ocel_start_events_per_object_type, \
filter_ocel_end_events_per_object_type, filter_ocel_events_timestamp, filter_prefixes, filter_suffixes, \
filter_four_eyes_principle, filter_activity_done_different_resources, filter_ocel_events, filter_ocel_objects, \
filter_ocel_object_types, filter_ocel_cc_object, filter_ocel_cc_length, filter_ocel_cc_otype
filter_ocel_object_types, filter_ocel_cc_object, filter_ocel_cc_length, filter_ocel_cc_otype, filter_ocel_cc_activity
from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_ilp, discover_petri_net_heuristics, \
discover_petri_net_inductive, discover_process_tree_inductive, discover_heuristics_net, \
discover_dfg, discover_footprints, discover_eventually_follows_graph, discover_directly_follows_graph, discover_bpmn_inductive, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
feature_names = ["@@ocel_lif_activity_"+str(x) for x in activities]

for obj in ordered_objects:
lif = lifecycle[obj]
data.append([])
if obj in lifecycle:
lif = lifecycle[obj]
else:
lif = []
for act in activities:
data[-1].append(len(list(x for x in lif if x == act)))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
feature_names = ["@@object_lifecycle_duration", "@@object_lifecycle_start_timestamp", "@@object_lifecycle_end_timestamp"]

for obj in ordered_objects:
se = first_object_timestamp[obj].timestamp()
ee = last_object_timestamp[obj].timestamp()
data.append([ee - se, se, ee])
if obj in first_object_timestamp:
se = first_object_timestamp[obj].timestamp()
ee = last_object_timestamp[obj].timestamp()
data.append([ee - se, se, ee])
else:
data.append([0, 0, 0])

return data, feature_names
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
feature_names = ["@@object_lifecycle_length"]

for obj in ordered_objects:
data.append([lifecycle_length[obj]])
if obj in lifecycle_length:
data.append([lifecycle_length[obj]])
else:
data.append([0])

return data, feature_names
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
feature_names = ["@@ocel_lif_path_"+str(x) for x in all_paths]

for obj in ordered_objects:
lif = paths[obj]
lif = paths[obj] if obj in paths else []
data.append([])
for p in all_paths:
data[-1].append(len(list(x for x in lif if x == p)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
feature_names = ["@@object_lifecycle_unq_act"]

for obj in ordered_objects:
data.append([lifecycle_unq[obj]])
if obj in lifecycle_unq:
data.append([lifecycle_unq[obj]])
else:
data.append([0])

return data, feature_names
39 changes: 39 additions & 0 deletions pm4py/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,3 +1192,42 @@ def filter_ocel_cc_otype(ocel: OCEL, otype: str, positive: bool = True) -> OCEL:
objs = [y for x in conn_comp for y in x]

return filter_ocel_objects(ocel, objs)


def filter_ocel_cc_activity(ocel: OCEL, activity: str) -> OCEL:
"""
Filters the objects belonging to the connected components having at least an event
with the provided activity.
Paper:
Adams, Jan Niklas, et al. "Defining cases and variants for object-centric event data." 2022 4th International Conference on Process Mining (ICPM). IEEE, 2022.
:param ocel: object-centric event log
:param activity: activity
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel('log.jsonocel')
filtered_ocel = pm4py.filter_ocel_cc_activity(ocel, 'Create Order')
"""
evs = set(ocel.events[ocel.events[ocel.event_activity] == activity][ocel.event_id_column])
objs = set(ocel.relations[ocel.relations[ocel.event_id_column].isin(evs)][ocel.object_id_column].unique())

from pm4py.algo.transformation.ocel.graphs import object_interaction_graph
import networkx as nx

g0 = object_interaction_graph.apply(ocel)
g = nx.Graph()

for edge in g0:
g.add_edge(edge[0], edge[1])

conn_comp = list(nx.connected_components(g))
conn_comp = [x for x in conn_comp if len(set(x).intersection(objs)) > 0]

objs = [y for x in conn_comp for y in x]

return filter_ocel_objects(ocel, objs)
19 changes: 8 additions & 11 deletions pm4py/objects/conversion/process_tree/variants/to_bpmn.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,18 +190,15 @@ def recursively_add_tree(parent_tree, tree, bpmn, initial_event, final_event, co
final_connector = final_connect

elif tree.operator == Operator.LOOP:
if len(tree_childs) != 2:
raise Exception("Loop doesn't have 2 childs")
else:
do = tree_childs[0]
redo = tree_childs[1]
bpmn, split, join, counts = add_xor_gateway(bpmn, counts)
bpmn, counts, i, y = recursively_add_tree(tree, do, bpmn, join, split, counts, rec_depth + 1)
do = tree_childs[0]
bpmn, split, join, counts = add_xor_gateway(bpmn, counts)
bpmn, counts, i, y = recursively_add_tree(tree, do, bpmn, join, split, counts, rec_depth + 1)
for redo in tree_childs[1:]:
bpmn, counts, x, y = recursively_add_tree(tree, redo, bpmn, split, join, counts, rec_depth + 1)
bpmn.add_flow(BPMN.Flow(initial_event, join))
bpmn.add_flow(BPMN.Flow(split, final_event))
initial_connector = join
final_connector = split
bpmn.add_flow(BPMN.Flow(initial_event, join))
bpmn.add_flow(BPMN.Flow(split, final_event))
initial_connector = join
final_connector = split

return bpmn, counts, initial_connector, final_connector

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def apply(net, im, fm, parameters=None):

if len(grouped_net.transitions) == 1:
pt_str = list(grouped_net.transitions)[0].label
pt = pt_util.parse(pt_str)
pt = pt_operator.ProcessTree(operator=None, label=pt_str)
ret = pt_util.fold(pt) if fold else pt
tree_sort(ret)
return ret
Expand Down
1 change: 1 addition & 0 deletions pm4py/objects/ocel/importer/sqlite/variants/ocel20.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
object_changes = object_changes.sort_values([event_timestamp, internal_index])
del object_changes[internal_index]

E2O.dropna(inplace=True, subset=[event_id, event_activity, event_timestamp, object_id, object_type])
ocel = OCEL(events=event_types_coll, objects=objects, relations=E2O, object_changes=object_changes, o2o=O2O, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)
Expand Down
2 changes: 1 addition & 1 deletion requirements_stable.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pydotplus==2.0.2
pyparsing==3.1.1
python-dateutil==2.8.2
pytz==2023.3
scipy==1.11.1
scipy==1.11.2
six==1.16.0
sortedcontainers==2.4.0
stringdist==1.0.9
Expand Down

0 comments on commit f89f476

Please sign in to comment.