Skip to content

Commit

Permalink
Raise error when resolving variable UUIDs to display names with dupli…
Browse files Browse the repository at this point in the history
…cates. (#34)
  • Loading branch information
daniel-k authored Oct 2, 2024
1 parent f37ff83 commit 3d2d564
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docs/errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ Errors

.. autoclass:: ResamplingValidationError
:show-inheritance:

.. autoclass:: DuplicateDisplayNameError
:show-inheritance:
10 changes: 10 additions & 0 deletions src/enlyze/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,13 @@ class ResamplingValidationError(EnlyzeError):
resampling interval is specified.
"""


class DuplicateDisplayNameError(EnlyzeError):
"""Variables with duplicate display names
Resolving variable UUIDs to display names would result in ambiguity because
multiple variables have the same display name. You should either fix the
duplicate variable display names via the ENLYZE App or don't request them at
the same time.
"""
25 changes: 22 additions & 3 deletions src/enlyze/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pandas

from enlyze.errors import DuplicateDisplayNameError
from enlyze.schema import dataframe_ensure_schema


Expand Down Expand Up @@ -143,6 +144,19 @@ def _display_names_as_column_names(self, columns: list[str]) -> list[str]:
if var.display_name
}

distinct_display_names = set(uuid_to_display_name.values())
if len(uuid_to_display_name) != len(distinct_display_names):
maybe_duplicate_display_names = list(uuid_to_display_name.values())
for name in distinct_display_names:
maybe_duplicate_display_names.remove(name)

raise DuplicateDisplayNameError(
", ".join(
f"'{duplicate_display_name}'"
for duplicate_display_name in set(maybe_duplicate_display_names)
)
)

return [uuid_to_display_name.get(var_uuid, var_uuid) for var_uuid in columns]

def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
Expand All @@ -153,12 +167,14 @@ def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
<python:datetime-naive-aware>` :py:class:`datetime.datetime` localized in UTC.
:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name fall back to UUID.
UUIDs. If there is no display name, fall back to UUID.
:raises: :exc:`~enlyze.errors.DuplicateDisplayNameError` when duplicate
display names would be returned instead of UUIDs.
:returns: Iterator over rows
"""

time_column, *variable_columns = self._columns

if use_display_names:
Expand All @@ -181,7 +197,10 @@ def to_dataframe(self, use_display_names: bool = False) -> pandas.DataFrame:
represented as a column named by its UUID.
:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name fall back to UUID.
UUIDs. If there is no display name, fall back to UUID.
:raises: :exc:`~enlyze.errors.DuplicateDisplayNameError` when duplicate
display names would be returned instead of UUIDs.
:returns: DataFrame with timeseries data indexed by time
Expand Down
24 changes: 23 additions & 1 deletion tests/enlyze/test_models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from dataclasses import replace
from datetime import datetime
from uuid import uuid4

import hypothesis.strategies as st
import pytest
from hypothesis import given

from enlyze.models import ProductionRun, ProductionRuns
from enlyze.errors import DuplicateDisplayNameError
from enlyze.models import ProductionRun, ProductionRuns, TimeseriesData, Variable


@given(runs=st.lists(st.from_type(ProductionRun), max_size=10))
Expand All @@ -28,3 +32,21 @@ def test_production_runs_to_dataframe_no_empty_columns_for_optional_dataclasses(

assert "quantity_total" not in df.columns
assert "average_throughput" in df.columns


@given(variable=st.builds(Variable, display_name=st.text(min_size=1)))
def test_timeseries_data_duplicate_display_names(variable):

variable_duplicate = replace(variable, uuid=uuid4())
variables = [variable, variable_duplicate]

data = TimeseriesData(
start=datetime.now(),
end=datetime.now(),
variables=variables,
_columns=["time", *[str(v.uuid) for v in variables]],
_records=[],
)

with pytest.raises(DuplicateDisplayNameError):
data.to_dataframe(use_display_names=True)

0 comments on commit 3d2d564

Please sign in to comment.