Skip to content

Commit

Permalink
Merge branch '173-rustxes-xes-importer-variant' into 'integration'
Browse files Browse the repository at this point in the history
[Priority 2] RUSTXES XES importer variant

Closes #173

See merge request process-mining/pm4py/pm4py-core!1150
  • Loading branch information
fit-alessandro-berti committed Nov 4, 2023
2 parents 63e9010 + cfda2c2 commit b838ff3
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pm4py/objects/log/importer/xes/importer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import importlib.util
from enum import Enum

from pm4py.objects.log.importer.xes.variants import iterparse, line_by_line, iterparse_mem_compressed, iterparse_20, chunk_regex
from pm4py.objects.log.importer.xes.variants import iterparse, line_by_line, iterparse_mem_compressed, iterparse_20, chunk_regex, rustxes


class Variants(Enum):
Expand All @@ -10,6 +10,7 @@ class Variants(Enum):
ITERPARSE_MEM_COMPRESSED = iterparse_mem_compressed
ITERPARSE_20 = iterparse_20
CHUNK_REGEX = chunk_regex
RUSTXES = rustxes


if importlib.util.find_spec("lxml"):
Expand Down Expand Up @@ -55,6 +56,8 @@ def apply(path, parameters=None, variant=DEFAULT_VARIANT):
variant = Variants.ITERPARSE_20
elif variant == "iterparse_mem_compressed":
variant = Variants.ITERPARSE_MEM_COMPRESSED
elif variant == "rustxes":
variant = Variants.RUSTXES

log = variant.value.apply(path, parameters=parameters)

Expand Down
31 changes: 31 additions & 0 deletions pm4py/objects/log/importer/xes/variants/rustxes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from enum import Enum
from pm4py.util import exec_utils
from typing import Optional, Dict, Any, Union
from pm4py.objects.log.obj import EventLog
from pm4py.objects.conversion.log import converter as log_converter
import pandas as pd
from copy import copy


class Parameters(Enum):
RETURN_LEGACY_LOG_OBJECT = "return_legacy_log_object"


def apply(log_path: str, parameters: Optional[Dict[Any, Any]] = None) -> Union[EventLog, pd.DataFrame]:
if parameters is None:
parameters = {}

return_legacy_log_object = exec_utils.get_param_value(Parameters.RETURN_LEGACY_LOG_OBJECT, parameters, True)

import rustxes

log = rustxes.import_xes(log_path)
log = log[0].to_pandas()

if return_legacy_log_object:
this_parameters = copy(parameters)
this_parameters["stream_postprocessing"] = True

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=this_parameters)

return log
2 changes: 2 additions & 0 deletions pm4py/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def read_xes(file_path: str, variant: str = "lxml", return_legacy_log_object: bo
v = xes_importer.Variants.LINE_BY_LINE
elif variant == "chunk_regex":
v = xes_importer.Variants.CHUNK_REGEX
elif variant == "rustxes":
v = xes_importer.Variants.RUSTXES

from copy import copy
parameters = copy(kwargs)
Expand Down
8 changes: 8 additions & 0 deletions tests/xes_impexp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ def test_importXESfromGZIP_imp2(self):
log = xes_importer.apply(os.path.join(COMPRESSED_INPUT_DATA, "01_running-example.xes.gz"))
del log

def test_rustxes_xes_import(self):
log = xes_importer.apply(os.path.join(INPUT_DATA_DIR, "receipt.xes"), variant=xes_importer.Variants.RUSTXES)
self.assertEqual(len(log), 1434)

def test_rustxes_xesgz_import(self):
log = xes_importer.apply(os.path.join(INPUT_DATA_DIR, "bpic2012.xes.gz"), variant=xes_importer.Variants.RUSTXES)
self.assertEqual(len(log), 13087)


if __name__ == "__main__":
unittest.main()

0 comments on commit b838ff3

Please sign in to comment.