From 97d3f1932a19a196522cc3be596fef2829787ffe Mon Sep 17 00:00:00 2001 From: Yusuke Tsutsumi Date: Thu, 20 Oct 2022 22:57:39 -0700 Subject: [PATCH 1/2] make strategy_append_unique work for unhashables strategy_append_unique does not work when determining uniqueness of unhashable objects. Adding some rudimentary support for the use case. repr() was chosen as the fallback as it's theoretically supposed to be a string which resolves to the object when executed, which effectively makes it a string that one could use for uniqueness. --- deepmerge/extended_set.py | 28 +++++++++++++++++++++++++++ deepmerge/strategy/list.py | 4 ++-- deepmerge/tests/strategy/test_list.py | 11 +++++++++++ docs/guide.rst | 13 +++++++++++-- 4 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 deepmerge/extended_set.py diff --git a/deepmerge/extended_set.py b/deepmerge/extended_set.py new file mode 100644 index 0000000..e7f5e48 --- /dev/null +++ b/deepmerge/extended_set.py @@ -0,0 +1,28 @@ +class ExtendedSet(set): + """ + ExtendedSet is an extension of set, which allows for usage + of types that are typically not allowed in a set + (e.g. unhashable). + + The following types that cannot be used in a set are supported: + + - unhashable types + """ + + def __init__(self, elements): + self._values_by_hash = { + self._hash(e): e for e in elements + } + + + def _insert(self, element): + self._values_by_hash[self._hash(element)] = element + + def _hash(self, element): + if getattr(element, "__hash__") is not None: + return hash(element) + else: + return hash(str(element)) + + def __contains__(self, obj): + return self._hash(obj) in self._values_by_hash \ No newline at end of file diff --git a/deepmerge/strategy/list.py b/deepmerge/strategy/list.py index ca42828..62b37bc 100644 --- a/deepmerge/strategy/list.py +++ b/deepmerge/strategy/list.py @@ -1,5 +1,5 @@ from .core import StrategyList - +from ..extended_set import ExtendedSet class ListStrategies(StrategyList): """ @@ -26,5 +26,5 @@ def strategy_append(config, path, base, nxt): @staticmethod def strategy_append_unique(config, path, base, nxt): """append items without duplicates in nxt to base.""" - base_as_set = set(base) + base_as_set = ExtendedSet(base) return base + [n for n in nxt if n not in base_as_set] diff --git a/deepmerge/tests/strategy/test_list.py b/deepmerge/tests/strategy/test_list.py index 39215a9..6f51f24 100644 --- a/deepmerge/tests/strategy/test_list.py +++ b/deepmerge/tests/strategy/test_list.py @@ -19,3 +19,14 @@ def test_strategy_append_unique(custom_merger): expected = [1, 3, 2, 5, 4] actual = custom_merger.merge(base, nxt) assert actual == expected + +def test_strategy_append_unique_nested_dict(custom_merger): + """append_unique should work even with unhashable objects + Like dicts. + """ + base = [{"bar": ["bob"]}] + nxt = [{"bar": ["baz"]}] + + result = custom_merger.merge(base, nxt) + + assert result == [{"bar": ["bob"]}, {"bar": ["baz"]}] \ No newline at end of file diff --git a/docs/guide.rst b/docs/guide.rst index 39f414a..a13bb01 100644 --- a/docs/guide.rst +++ b/docs/guide.rst @@ -10,7 +10,7 @@ it's recommended to choose your own strategies, deepmerge does provided some preconfigured mergers for a common situations: * deepmerge.always_merger: always try to merge. in the case of mismatches, the value from the second object overrides the first o ne. -* deepmerge.merge_or_raise: try to merge, raise an exception if an unmergable situation is encountered. +* deepmerge.merge_or_raise: try to merge, raise an exception if an unmergable situation is encountered. * deepmerge.conservative_merger: similar to always_merger, but in the case of a conflict, use the existing value. Once a merger is constructed, it then has a merge() method that can be called: @@ -33,7 +33,6 @@ Once a merger is constructed, it then has a merge() method that can be called: Merges are Destructive ====================== - You may have noticed from the example, but merging is a destructive behavior: it will modify the first argument passed in (the base) as part of the merge. This is intentional, as an implicit copy would result in a significant performance slowdown for deep data structures. If you need to keep the original objects unmodified, you can use the deepcopy method: @@ -96,3 +95,13 @@ Example: If a strategy fails, an exception should not be raised. This is to ensure it can be chained with other strategies, or the fall-back. +Uniqueness of elements when merging +=================================== + +Some strategies require determining the uniqueness +of the elements. Since deepmerge primarily deals with nested +types, this includes structures that are not hashable such as +dictionaries. + +In those cases, built-in deepmerge strategies will call repr() +on the object and hash that value instead. \ No newline at end of file From 40239c0d33aa1fa87dc21ca81925de7a02c05ac6 Mon Sep 17 00:00:00 2001 From: Yusuke Tsutsumi Date: Mon, 24 Oct 2022 22:14:59 -0700 Subject: [PATCH 2/2] updating black black had a traceback where it couldn't find _unicodefun from click. Updating black resolves the issue. Updating formatting that comes with it. --- Makefile | 2 +- deepmerge/extended_set.py | 7 ++----- deepmerge/strategy/list.py | 1 + deepmerge/tests/strategy/test_list.py | 3 ++- docs/conf.py | 18 +++++++++--------- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 9a611ee..75ba49c 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ build: .venv/deps # only works with python 3+ lint: .venv/deps - .venv/bin/python -m pip install black==21.12b0 + .venv/bin/python -m pip install black==22.3.0 .venv/bin/python -m black --check . test: .venv/deps diff --git a/deepmerge/extended_set.py b/deepmerge/extended_set.py index e7f5e48..1d51b43 100644 --- a/deepmerge/extended_set.py +++ b/deepmerge/extended_set.py @@ -10,10 +10,7 @@ class ExtendedSet(set): """ def __init__(self, elements): - self._values_by_hash = { - self._hash(e): e for e in elements - } - + self._values_by_hash = {self._hash(e): e for e in elements} def _insert(self, element): self._values_by_hash[self._hash(element)] = element @@ -25,4 +22,4 @@ def _hash(self, element): return hash(str(element)) def __contains__(self, obj): - return self._hash(obj) in self._values_by_hash \ No newline at end of file + return self._hash(obj) in self._values_by_hash diff --git a/deepmerge/strategy/list.py b/deepmerge/strategy/list.py index 62b37bc..2e42519 100644 --- a/deepmerge/strategy/list.py +++ b/deepmerge/strategy/list.py @@ -1,6 +1,7 @@ from .core import StrategyList from ..extended_set import ExtendedSet + class ListStrategies(StrategyList): """ Contains the strategies provided for lists. diff --git a/deepmerge/tests/strategy/test_list.py b/deepmerge/tests/strategy/test_list.py index 6f51f24..7eb2d3b 100644 --- a/deepmerge/tests/strategy/test_list.py +++ b/deepmerge/tests/strategy/test_list.py @@ -20,6 +20,7 @@ def test_strategy_append_unique(custom_merger): actual = custom_merger.merge(base, nxt) assert actual == expected + def test_strategy_append_unique_nested_dict(custom_merger): """append_unique should work even with unhashable objects Like dicts. @@ -29,4 +30,4 @@ def test_strategy_append_unique_nested_dict(custom_merger): result = custom_merger.merge(base, nxt) - assert result == [{"bar": ["bob"]}, {"bar": ["baz"]}] \ No newline at end of file + assert result == [{"bar": ["bob"]}, {"bar": ["baz"]}] diff --git a/docs/conf.py b/docs/conf.py index ee1edbc..df0dc4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,18 +52,18 @@ master_doc = "index" # General information about the project. -project = u"deepmerge" -copyright = u"2016, Yusuke Tsutsumi" -author = u"Yusuke Tsutsumi" +project = "deepmerge" +copyright = "2016, Yusuke Tsutsumi" +author = "Yusuke Tsutsumi" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = u"0.1" +version = "0.1" # The full version, including alpha/beta/rc tags. -release = u"0.1" +release = "0.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -271,8 +271,8 @@ ( master_doc, "deepmerge.tex", - u"deepmerge Documentation", - u"Yusuke Tsutsumi", + "deepmerge Documentation", + "Yusuke Tsutsumi", "manual", ), ] @@ -308,7 +308,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "deepmerge", u"deepmerge Documentation", [author], 1)] +man_pages = [(master_doc, "deepmerge", "deepmerge Documentation", [author], 1)] # If true, show URL addresses after external links. # @@ -324,7 +324,7 @@ ( master_doc, "deepmerge", - u"deepmerge Documentation", + "deepmerge Documentation", author, "deepmerge", "One line description of project.",