From f0eb9d2aac33655722a735e690647088b3517d1a Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Wed, 28 Aug 2024 17:27:45 -0400 Subject: [PATCH 1/9] Draft version; the relevant code is in sandbox.py, where it will remain until finished. --- sandbox.py | 439 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 439 insertions(+) create mode 100644 sandbox.py diff --git a/sandbox.py b/sandbox.py new file mode 100644 index 00000000000..5b18ff55112 --- /dev/null +++ b/sandbox.py @@ -0,0 +1,439 @@ +# Goal: Create a program that can separate Jsons out into diffs. Here's what I need: + +# I need to know all keys added in the second commit. (Keys of set B that are not in set A.) +# I need all keys added in that commit. (Keys of A that are not in the set of keys of B) +# I need all keys changed. Now, changes can have specific qualities, based on the types of key: +# Potential types of the key: Single Value (Boolean, String, Number, null)-- these can only be deleted or changed. +# Array (convert the values into sets, and compare them.) +# Dict: Recurse. +# Therefore, I need to define a few elements/operations. INSERT KEY (value) and DELETE KEY( value). +# +# So, let's examine this in cases. What does it actually mean to undo an action? We have 3 types of values. +# STAGES FOR UNDOING: +# undoing an add +# -simple case: delete if not changed or deleted in a later diff. +# -add an element to a list- search that list for that element, and if it's there, delete it. +# - adding a dictionary- delete all keys that were added with the new dict. If said dict is then empty, delete it. +# undoing a delete: +# simple case: add back the key:value pair that was deleted, if it was not added again by a later dict +# deleted from a list: append that element back to the list. +# deleted a key from a dict: add that key and value back, if it does not already exist. +# undoing a replace +# simple case: return to the previous version's value +# lists: ???? if all goes well, a list changing should never occur, it will only have elements of it being added or deleted. +# dicts: revert all key:Values within the dict back to normal, if they are not touched by the later commit. +# Replace: +# Do I want a recursive definition? Quite possibly. After all, if it's recursive, it will properly allow me to deal with all sorts of potential structures. Now, problem-- how do I handle this data? +# How do I have the paths properly? +# Thought -> Pass the appropriate prefix into the function. Then-- Okay, let's just Haskell it. +# On internal dict: [f'{prefix}/{newKey}' for newKey in diffDict()] + + +# Stages for recording changes: Simple keys/values, easyy. +# Let's see about lists now. We have two types, ordered and unordered. This will be tricky-ish. How will I accomplish this? Let's assume that people aren't (usually) manually created, +# And usually only function from diff_dicts, and the like. This way, we have an easy pipeline to go to work with, and can potentially ignore some inconveniences. +# So let's see. Desired behavior when performing the Add operation: +# - If adding a simple key value, check if the key already exists. If it doesn't exist, add it. +# - If adding a value to a list, check to see if there's an index argument. If not, append it to the end of the list. +# - If navigating to a value that doesn't exist, create keys on the way to what doesn't exist. This begs the question, should it be a different type +# to add to arrays? I think maybe, yes actually-- I like the idea of that. +# Yeah, the elegant way to do this is to remove Replace, and instead use a variety of insert keys, but is that invertible? Let's think about it this way. +# - Is the insert method invertible? It certainly is, it is undone by pop. +# Alright, I think I need to bite the bullet, and use the 'path' method. Okay. This way, each add is responsible for a single data point. +# This is a pitfall. Alright, I think I need to be careful, because the problem with this, is that 'adding to a dict is an intrinsically separate operation from adding to a list. +# But both use the same way of indexing in. How can I avoid ambiguity, here? Do I need to avoid it? +# Let's say that I'm adding to it with a key. +# Alright, back to breaking down the types of adding. +# - Adding a dict key and value. +# - Adding an element to a list (orderless) +# - Adding an element to a list (ordered) +from copy import deepcopy +from dataclasses import dataclass +from types import MappingProxyType +from typing import Literal + + +@dataclass +class Change: + def __init__( + self, path: str, value: str | list | int = "", old_value: str | list | int = "" + ): + self.path = path + self.old_value = old_value + self.value = value + + def apply(self, doc: dict): + pass + + def invert(self): + pass + + +# Inverting an Add means that it deletes. +class Add(Change): + def __init__( + self, path: str, value: str | list | int = "", old_value: str | list | int = "" + ): + super().__init__(path, value, old_value) + + def apply(self, document: dict) -> dict: + doc = deepcopy(document) + entry = doc + key_list = self.path.split("/") + for key in key_list[:-1]: + try: + entry = entry[key] + except: + entry[key] = {} + entry = entry[key] + # If the value is a list, and there exists a key:value pair at the full end of the path, we want to instead + # add the elements of the value into the list. + if isinstance(entry, list): + if key_list[-1] == "*" or int(key_list[-1]) > len(entry) - 1: + entry.append(self.value) + else: + entry.insert(int(key_list[-1]), self.value) + else: + entry.setdefault(key_list[-1], self.value) + return doc + + def invert(self): + return Delete(self.path, "", old_value=self.value) + + def __repr__(self): + return f'Add({self.path}, {self.value}, {self.old_value})' + + +# The Delete class does not need a value, normally. However, when deleting elements from a list, the value equals +# the value of the element to be deleted. +# So, Delete's path needs to record the index for re-adding. Does this mean a recursive function is best????? +# Add's information: key, value, and path. The path for a list includes the index after the /. +# Delete's old value is the value that was deleted. This is not necessary when it has a singular value, as in deleting an element from a list. +class Delete(Change): + def __init__( + self, path: str, value: str | list | int = "", old_value: str | list | int = "" + ): + super().__init__(path, value, old_value) + + def apply(self, document: dict | list): + doc = deepcopy(document) + entry = doc + key_list = self.path.split("/") + for key in key_list[:-1]: + try: + entry = entry[key] + except: + return doc + # If the value is a list, then we want to delete based off of the index provided at the end of the path. + if isinstance(entry, list): + if key_list[-1] == "*" or int(key_list[-1]) > len(entry) - 1: + entry.pop() + else: + entry.pop(int(key_list[-1])) + else: + try: + entry.pop(key_list[-1]) + except: + pass + return doc + + def invert(self): + return Add(self.path, self.value) + + def __repr__(self): + return f'Delete({self.path}, {self.value}, {self.old_value})' + + +@dataclass +class Patch: + + def __init__(self, change_list: list[Change] | None = None): + self.change_list = change_list if change_list else [] + + def append(self, change: Change): + self.change_list.append(change) + + # Python won't let me typehint this as a 'Patch' object. + def extend(self, changes): + self.change_list.extend(changes.change_list) + + def apply(self, doc): + changed_doc = doc + for change in self.change_list: + change.apply(changed_doc) + return changed_doc + + # out of place operation, returns a new Patch object. + def invert(self): + return Patch([change.invert() for change in reversed(self.change_list)]) + + def __repr__(self): + return f'Patch({self.change_list!s})' + + +def find_key_by_value(find_value, index: dict): + for key, value in index.items(): + if value == find_value: + return key + return None + + +def diff_unordered_list(list1: list, list2: list, path: str = "") -> Patch: + item_id_to_count: dict = {} + item_id_to_item: dict = {} + item_id: int = 0 + change_list = Patch([]) + for elem in list2: + if elem not in item_id_to_item.values(): + item_id_to_item[item_id] = elem + item_id += 1 + if item_id := find_key_by_value(elem, item_id_to_item): + item_id_to_count[item_id] = item_id_to_count.get(item_id, 0) + 1 + for elem in list1: + if elem not in item_id_to_item.values(): + item_id_to_item[item_id] = elem + item_id += 1 + if item_id := find_key_by_value(elem, item_id_to_item): + item_id_to_count[item_id] = item_id_to_count.get(item_id, 0) - 1 + + for key, value in item_id_to_count.items(): + if value > 0: + for i in range(value): + change_list.append(Add(f'{path}/*', key)) + elif value < 0: + for i in range(abs(value)): + change_list.append(Delete(f'{path}/*', value="", old_value=key)) + return change_list + + +# What do I prioritize, in +def diff_ordered_list(input1: list, input2: list, path: str = "") -> Patch: + changelist = Patch([]) + shared_values = find_shared_sequences(input1, input2) + deleted_indices = [x for x in range(len(input1)) if x not in shared_values[0]] + added_indices = [y for y in range(len(input2)) if y not in shared_values[1]] + for i in range(len(deleted_indices)): + changelist.append( + Delete( + f'{path + "/" if path else ""}{deleted_indices[i] -i}', + "", + old_value=input1[deleted_indices[i]], + ) + ) + for addition in added_indices: + changelist.append( + Add(f'{path + "/" if path else ""}{addition}', input2[addition]) + ) + return changelist + + +# Alright, this is a problem. From the microscopic view (only seeing if two indices of a list are different), there's +# no actual way to determine if the operation was +# An add or a delete. This means that any solution I have will have to +# examine other pieces of the list, as well. This means a recursive method might not be ideal. +# Now, here's a thought. Okay. Let's actually define a 'Swap' change, maybe? No, alright, this is going to be a problem. Let's figure it out. +# So my current issue has to do with figuring out a way to efficiently diff sections of a list. +# step 1, match if the same, adding the index object to the shared_values array. +# step 2, if not the same, increment the right until they are the same, and continue. If right goes over the +# step 3: To handle duplicates; do I need to run this multiple times, for each possible configuration? That feels like the best way, but also like it could get +# expensive to manage. +# Alright, let's think of it this way. There's a, per repetition of a^x b^y c^z, etc, it'll be a total of x*y*z combinations. How often will this occur? +# Probably not often. However, this will lead to issues in situations of...... Alright, let's consider the possibility of +def find_shared_sequences(input1: list, input2: list): + right_pointer = 0 + left_pointer = 0 + shared_doc1_indices = [] + shared_doc2_indices = [] + while left_pointer < len(input1): + if ( + right_pointer < len(input2) + and input1[left_pointer] == input2[right_pointer] + ): + shared_doc1_indices.append(left_pointer) + shared_doc2_indices.append(right_pointer) + left_pointer += 1 + right_pointer += 1 + else: + pointer_buffer = right_pointer + while right_pointer < len(input2): + if input1[left_pointer] == input2[right_pointer]: + break + right_pointer += 1 + if right_pointer >= len(input2): + left_pointer += 1 + right_pointer = pointer_buffer + return (shared_doc1_indices, shared_doc2_indices) + + +def diff_dicts( + doc1: dict, + doc2: dict, + prefix="", + default_list_handler: Literal["ordered", "unordered"] = "ordered", + overrides: dict[str, Literal['ordered', 'unordered']] | None = None, +): + if not overrides: + overrides = {} + doc1_keys = set(doc1.keys()) + doc2_keys = set(doc2.keys()) + shared = doc1_keys.intersection(doc2_keys) + print(shared) + deleted = doc1_keys.difference(doc2_keys) + added = doc2_keys.difference(doc1_keys) + change_list = Patch() + for key in shared: + if doc1[key] != doc2[key]: + if type(doc1[key]) is not type(doc2[key]): + change_list.extend( + [ + Delete(f'{prefix + "/" if prefix else ""}{key}', "", doc1[key]), + Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), + ] + ) + elif type(doc1[key]) is list: + method = overrides.get(key, default_list_handler) + if method == "unordered": + change_list.extend( + diff_unordered_list( + doc1[key], + doc2[key], + path=f'{prefix + "/" if prefix else ""}', + ) + ) + else: + change_list.extend( + diff_ordered_list( + doc1[key], + doc2[key], + path=f'{prefix + "/" if prefix else ""}', + ) + ) + + elif type(doc1[key]) is dict: + change_list.extend( + diff_dicts( + doc1[key], + doc2[key], + prefix=f'{prefix + "/" if prefix else ""}{key}', + default_list_handler=default_list_handler, + overrides=overrides, + ) + ) + else: + change_list.extend( + [ + Delete(f'{prefix + "/" if prefix else ""}{key}', "", doc1[key]), + Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), + ] + ) + for key in deleted: + change_list.append(Delete(f'{prefix + "/" if prefix else ""}{key}')) + for key in added: + change_list.append(Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key])) + return change_list + + +# [start here] +def test_diff_ordered_list(): + + assert diff_ordered_list([1, 2, 3, 4], [1, 2, 4, 5]) == Patch( + [Delete("2", "", "3"), Add("3", "5")] + ), "testing diff with a replacement of a value" + assert diff_ordered_list([], [1, 2, 3, 4]) == Patch( + [Add("0", 1), Add("1", 2), Add("2", 3), Add("3", 4)] + ), "testing diff adding to an empty dict" + assert diff_ordered_list([1, 2, 3], []) == Patch( + [Delete("0", "", "1"), Delete("0", "", "2"), Delete("0", "", "3")] + ), "Testing deleting all elements from a list." + assert diff_ordered_list([0, 1, 2], [2, 1, 0]) == Patch( + [Delete("1", "", 1), Delete("1", "", "2"), Add("0", 2), Add("1", 1)] + ), "testing rearrangement of elements" + assert diff_ordered_list([{"1": "2"}], [{"2": "1"}]) == Patch( + [Delete("0", "", {'1': '2'}), Add("0", {'2': '1'})] + ), "testing replacement with dict elements" + + +def test_apply_add(): + + assert Add("foo", "bar").apply({}) == { + "foo": "bar" + }, "testing simple application of Add on an empty dict" + assert Add("foo/foo", "bar").apply({}) == { + "foo": {"foo": "bar"} + }, "testing simple adding of a dict-based value to an empty dict." + assert Add("foo/3", 1).apply({"foo": [5, 6, 2, 3, 4]}) == { + "foo": [5, 6, 2, 1, 3, 4] + }, "Testing inserting a value into a list." + assert Add("foo/*", 1).apply({"foo": [5, 6, 2, 3, 4]}) == { + "foo": [5, 6, 2, 3, 4, 1] + }, "Testing appending a value to a list." + assert Add("foo", [1, 2, 3, 4]).apply({}) == { + "foo": [1, 2, 3, 4] + }, "testing adding a new key with a list value." + assert Add("foo/6", 6).apply({"foo": [5, 6, 6, 1]}) == { + "foo": [5, 6, 6, 1, 6] + }, "Testing inserting past the list length." + assert Add("foo", "bar").apply({"foo": "not bar"}) == { + "foo": "not bar" + }, "testing adding to a dict where the value already exists." + + +def test_apply_delete(): + + assert Delete("foo").apply({"bar": "foo"}) == { + "bar": "foo" + }, "testing a failed attempt at removing a key" + assert ( + Delete("foo").apply({"foo": "bar"}) == {} + ), "testing removal of last entry in a dict." + assert Delete("foo/3").apply({"foo": [1, 1, 2, 1, 4]}) == { + "foo": [1, 1, 2, 4] + }, "testing removal of an specific index in a list." + assert Delete("foo/*").apply({"foo": [1, 2, 3, 4, 5]}) == { + "foo": [1, 2, 3, 4] + }, "testing removal of the last element in a list." + assert Delete("foo/6").apply({"foo": [1, 2, 3]}) == { + "foo": [1, 2] + }, "testing removal of an index out of scope in the list." + assert Delete("foo/bar").apply({"foo": {"bar": "foo", "foo": "bar"}}) == { + "foo": {"foo": "bar"} + }, "testing removal of item in a nested dict." + assert ( + Delete("foo").apply({"foo": [1, 2, 3, 4]}) == {} + ), "Testing full removal of an array." + assert Delete("foo/3").apply({"foo": [1, 2, 3, {"bar": "foo"}]}) == { + "foo": [1, 2, 3] + }, "testing removal of a dict in an array" + + +def test_diff_adds(): + assert diff_dicts({}, {"foo": "bar"}) == Patch( + [Add("foo", "bar")] + ), "Error in adding a single value to an empty dict." + assert diff_dicts({"foo": {}}, {"foo": {"foo": "bar"}}) == Patch( + [Add("foo/foo", "bar")] + ), "Error in adding a key to a nested dict" + assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 4, 5, 6]}) == Patch( + [Add("foo", [5, 6])] + ), "Error in adding values to an array." + assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 1, 1, 1, 2, 3, 4]}) == Patch( + [Add("foo", [1, 1, 1])] + ), "Error in tracking the number of duplicate values added." + + +def test_diff_deletes(): + assert diff_dicts({"foo": "bar"}, {}) == Patch( + [Delete("foo")] + ), "Error in deleting a key from a dictionary." + assert diff_dicts( + {"foo": {"foo": "bar", "bar": "foo"}}, {"foo": {"foo": "bar"}} + ) == Patch([Delete("foo/bar")]), "Error deleting a value in a nested dict" + assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3]}) == Patch( + [Delete("foo", [4])] + ), "error deleting a value in an array" + assert diff_dicts({"foo": {"foo": "bar"}, "bar": "foo"}, {"bar": "foo"}) == Patch( + [Delete("foo")] + ), "Error in deleting a dict." + assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 3]}) == Patch( + [Delete("foo", [2, 4])] + ), "error in deleting a non-final element in a list." From c774d0ed981dc351c6dd65f01f12c85b9aad1eeb Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Fri, 30 Aug 2024 19:37:59 -0400 Subject: [PATCH 2/9] Altered Change Class to provide list type, indexing information, and require Deletes to have the correct value. --- sandbox.py | 231 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 79 deletions(-) diff --git a/sandbox.py b/sandbox.py index 5b18ff55112..5e90634fba9 100644 --- a/sandbox.py +++ b/sandbox.py @@ -23,14 +23,16 @@ # lists: ???? if all goes well, a list changing should never occur, it will only have elements of it being added or deleted. # dicts: revert all key:Values within the dict back to normal, if they are not touched by the later commit. # Replace: -# Do I want a recursive definition? Quite possibly. After all, if it's recursive, it will properly allow me to deal with all sorts of potential structures. Now, problem-- how do I handle this data? +# Do I want a recursive definition? Quite possibly. After all, if it's recursive, +# it will properly allow me to deal with all sorts of potential structures. Now, problem-- how do I handle this data? # How do I have the paths properly? # Thought -> Pass the appropriate prefix into the function. Then-- Okay, let's just Haskell it. # On internal dict: [f'{prefix}/{newKey}' for newKey in diffDict()] # Stages for recording changes: Simple keys/values, easyy. -# Let's see about lists now. We have two types, ordered and unordered. This will be tricky-ish. How will I accomplish this? Let's assume that people aren't (usually) manually created, +# Let's see about lists now. We have two types, ordered and unordered. +# This will be tricky-ish. How will I accomplish this? Let's assume that Patches aren't (usually) manually created, # And usually only function from diff_dicts, and the like. This way, we have an easy pipeline to go to work with, and can potentially ignore some inconveniences. # So let's see. Desired behavior when performing the Add operation: # - If adding a simple key value, check if the key already exists. If it doesn't exist, add it. @@ -40,7 +42,8 @@ # Yeah, the elegant way to do this is to remove Replace, and instead use a variety of insert keys, but is that invertible? Let's think about it this way. # - Is the insert method invertible? It certainly is, it is undone by pop. # Alright, I think I need to bite the bullet, and use the 'path' method. Okay. This way, each add is responsible for a single data point. -# This is a pitfall. Alright, I think I need to be careful, because the problem with this, is that 'adding to a dict is an intrinsically separate operation from adding to a list. +# This is a pitfall. Alright, I think I need to be careful, because the problem with this, +# is that 'adding to a dict is an intrinsically separate operation from adding to a list. # But both use the same way of indexing in. How can I avoid ambiguity, here? Do I need to avoid it? # Let's say that I'm adding to it with a key. # Alright, back to breaking down the types of adding. @@ -49,18 +52,41 @@ # - Adding an element to a list (ordered) from copy import deepcopy from dataclasses import dataclass -from types import MappingProxyType +from contextlib import suppress from typing import Literal +# When should an Add be inverted? And what does it mean, what's the desired behavior? +# The desired behavior is split into three sections. +# -normal data type: if the value still matches the value from the add, then it should be deleted. If it was a complex data type, then ... +# all keys matching it should be changed? No, that doesn't sound right. Let's consider the scenario where I've added in an author name. +# deleting this is relatively trivial. If the author name is still noted at that address, then, remove it. +# Alright, I think this might benefit from splitting it up into two +# different typese for adds/deletes respectively. Also, worth noting-- this is going to +# find the fastest solution regardless, so there will not be unnecessary in betweens-- and even if they are, they'll be governed by order +# Information needed to see if an Add should be undone (because it's a function on this add, all of this needs to be available to deletes. ): +# - The path of the value to check. +# - Is the value in a list? (This is checkable in function.) +# - If so, if it's unordered, then remove the first +# instance of it in the list. Do nothing if the element is not in the list. +# - If the list is ordered, then ????? [[return later]] (This will +# need to be aware of intermediary changes, so I think I'll have to actually +# generate and compare/add the diffs together of the next function.) +# - If the value's not in a list, then just check if it matches the old value. if it does, then delete it. +# @dataclass class Change: def __init__( - self, path: str, value: str | list | int = "", old_value: str | list | int = "" + self, + path: str, + value: str | list | int = "", + list_type: Literal["ol", "ul", None] = None, + index: None | int = None, ): self.path = path - self.old_value = old_value + self.index = index self.value = value + self.list_type = list_type def apply(self, doc: dict): pass @@ -72,36 +98,43 @@ def invert(self): # Inverting an Add means that it deletes. class Add(Change): def __init__( - self, path: str, value: str | list | int = "", old_value: str | list | int = "" + self, + path: str, + value: str | list | int = "", + list_type: Literal["ol", "ul", None] = None, + index: None | int = None, ): - super().__init__(path, value, old_value) + super().__init__(path, value, list_type, index) def apply(self, document: dict) -> dict: doc = deepcopy(document) entry = doc key_list = self.path.split("/") for key in key_list[:-1]: - try: - entry = entry[key] - except: - entry[key] = {} - entry = entry[key] + entry.setdefault(key, {}) + entry = entry[key] # If the value is a list, and there exists a key:value pair at the full end of the path, we want to instead # add the elements of the value into the list. - if isinstance(entry, list): - if key_list[-1] == "*" or int(key_list[-1]) > len(entry) - 1: - entry.append(self.value) + + add_point = entry.get(key_list[-1], None) + if isinstance(add_point, list): + if self.list_type == "ol" and self.index: + add_point.insert(self.index, self.value) + elif self.list_type == "ul" or ( + self.list_type == "ol" and self.index and self.index >= len(add_point) + ): + add_point.append(self.value) else: - entry.insert(int(key_list[-1]), self.value) + print("failed to add to list") else: entry.setdefault(key_list[-1], self.value) return doc def invert(self): - return Delete(self.path, "", old_value=self.value) + return Delete(self.path, self.value, self.list_type, self.index) def __repr__(self): - return f'Add({self.path}, {self.value}, {self.old_value})' + return f'Add({self.path}, {self.value}{", " + self.list_type if self.list_type else ""}){"[" + str(self.index) + "]" if self.index else ""}' # The Delete class does not need a value, normally. However, when deleting elements from a list, the value equals @@ -111,37 +144,49 @@ def __repr__(self): # Delete's old value is the value that was deleted. This is not necessary when it has a singular value, as in deleting an element from a list. class Delete(Change): def __init__( - self, path: str, value: str | list | int = "", old_value: str | list | int = "" + self, + path: str, + value: str | list | int = "", + list_type: Literal["ol", "ul", None] = None, + index: None | int = None, ): - super().__init__(path, value, old_value) + super().__init__(path, value, list_type, index) def apply(self, document: dict | list): doc = deepcopy(document) entry = doc key_list = self.path.split("/") for key in key_list[:-1]: - try: - entry = entry[key] - except: + entry = entry.get(key, None) + if not entry: return doc + # If the value is a list, then we want to delete based off of the index provided at the end of the path. - if isinstance(entry, list): - if key_list[-1] == "*" or int(key_list[-1]) > len(entry) - 1: - entry.pop() - else: - entry.pop(int(key_list[-1])) + + add_point = entry.get(key_list[-1], None) + if add_point is None: + return doc + if add_point == self.value: + entry.pop(key_list[-1]) else: - try: - entry.pop(key_list[-1]) - except: - pass + + if isinstance(add_point, list): + if ( + self.list_type == "ol" + and self.index + and add_point[self.index] == self.value + ): + add_point.pop(self.index) + elif self.list_type == "ul": + with suppress(ValueError): + add_point.remove(self.value) return doc def invert(self): - return Add(self.path, self.value) + return Add(self.path, self.value, self.list_type, self.index) def __repr__(self): - return f'Delete({self.path}, {self.value}, {self.old_value})' + return f'Delete({self.path}, {self.value}{", " + self.list_type if self.list_type else ""}){"[" + str(self.index) + "]" if self.index else ""}' @dataclass @@ -155,12 +200,15 @@ def append(self, change: Change): # Python won't let me typehint this as a 'Patch' object. def extend(self, changes): - self.change_list.extend(changes.change_list) + if type(changes) is list: + self.change_list.extend(changes) + else: + self.change_list.extend(changes.change_list) def apply(self, doc): - changed_doc = doc + changed_doc = deepcopy(doc) for change in self.change_list: - change.apply(changed_doc) + changed_doc = change.apply(changed_doc) return changed_doc # out of place operation, returns a new Patch object. @@ -199,10 +247,12 @@ def diff_unordered_list(list1: list, list2: list, path: str = "") -> Patch: for key, value in item_id_to_count.items(): if value > 0: for i in range(value): - change_list.append(Add(f'{path}/*', key)) + change_list.append(Add(f'{path}', item_id_to_item[key], list_type="ul")) elif value < 0: for i in range(abs(value)): - change_list.append(Delete(f'{path}/*', value="", old_value=key)) + change_list.append( + Delete(f'{path}', item_id_to_item[key], list_type="ul") + ) return change_list @@ -215,15 +265,14 @@ def diff_ordered_list(input1: list, input2: list, path: str = "") -> Patch: for i in range(len(deleted_indices)): changelist.append( Delete( - f'{path + "/" if path else ""}{deleted_indices[i] -i}', - "", - old_value=input1[deleted_indices[i]], + path, + value=input1[deleted_indices[i]], + list_type="ol", + index=deleted_indices[i] - i, ) ) for addition in added_indices: - changelist.append( - Add(f'{path + "/" if path else ""}{addition}', input2[addition]) - ) + changelist.append(Add(path, input2[addition], list_type="ol", index=addition)) return changelist @@ -286,7 +335,7 @@ def diff_dicts( if type(doc1[key]) is not type(doc2[key]): change_list.extend( [ - Delete(f'{prefix + "/" if prefix else ""}{key}', "", doc1[key]), + Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key]), Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), ] ) @@ -297,7 +346,7 @@ def diff_dicts( diff_unordered_list( doc1[key], doc2[key], - path=f'{prefix + "/" if prefix else ""}', + path=f'{prefix + "/" if prefix else ""}{key}', ) ) else: @@ -305,7 +354,7 @@ def diff_dicts( diff_ordered_list( doc1[key], doc2[key], - path=f'{prefix + "/" if prefix else ""}', + path=f'{prefix + "/" if prefix else ""}{key}', ) ) @@ -322,12 +371,12 @@ def diff_dicts( else: change_list.extend( [ - Delete(f'{prefix + "/" if prefix else ""}{key}', "", doc1[key]), + Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key]), Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), ] ) for key in deleted: - change_list.append(Delete(f'{prefix + "/" if prefix else ""}{key}')) + change_list.append(Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key])) for key in added: change_list.append(Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key])) return change_list @@ -337,19 +386,29 @@ def diff_dicts( def test_diff_ordered_list(): assert diff_ordered_list([1, 2, 3, 4], [1, 2, 4, 5]) == Patch( - [Delete("2", "", "3"), Add("3", "5")] + [Delete("", 3, "ol", 2), Add("", 5, "ol", 3)] ), "testing diff with a replacement of a value" assert diff_ordered_list([], [1, 2, 3, 4]) == Patch( - [Add("0", 1), Add("1", 2), Add("2", 3), Add("3", 4)] + [ + Add("", 1, "ol", 0), + Add("", 2, "ol", 1), + Add("", 3, "ol", 2), + Add("", 4, "ol", 3), + ] ), "testing diff adding to an empty dict" assert diff_ordered_list([1, 2, 3], []) == Patch( - [Delete("0", "", "1"), Delete("0", "", "2"), Delete("0", "", "3")] + [Delete("", 1, "ol", 0), Delete("", 2, "ol", 0), Delete("", 3, "ol", 0)] ), "Testing deleting all elements from a list." assert diff_ordered_list([0, 1, 2], [2, 1, 0]) == Patch( - [Delete("1", "", 1), Delete("1", "", "2"), Add("0", 2), Add("1", 1)] + [ + Delete("", 1, "ol", 1), + Delete("", 2, "ol", 1), + Add("", 2, "ol", 0), + Add("", 1, "ol", 1), + ] ), "testing rearrangement of elements" assert diff_ordered_list([{"1": "2"}], [{"2": "1"}]) == Patch( - [Delete("0", "", {'1': '2'}), Add("0", {'2': '1'})] + [Delete("", {"1": "2"}, "ol", 0), Add("", {"2": "1"}, "ol", 0)] ), "testing replacement with dict elements" @@ -361,16 +420,16 @@ def test_apply_add(): assert Add("foo/foo", "bar").apply({}) == { "foo": {"foo": "bar"} }, "testing simple adding of a dict-based value to an empty dict." - assert Add("foo/3", 1).apply({"foo": [5, 6, 2, 3, 4]}) == { + assert Add("foo", 1, "ol", 3).apply({"foo": [5, 6, 2, 3, 4]}) == { "foo": [5, 6, 2, 1, 3, 4] }, "Testing inserting a value into a list." - assert Add("foo/*", 1).apply({"foo": [5, 6, 2, 3, 4]}) == { + assert Add("foo", 1, "ul").apply({"foo": [5, 6, 2, 3, 4]}) == { "foo": [5, 6, 2, 3, 4, 1] }, "Testing appending a value to a list." assert Add("foo", [1, 2, 3, 4]).apply({}) == { "foo": [1, 2, 3, 4] }, "testing adding a new key with a list value." - assert Add("foo/6", 6).apply({"foo": [5, 6, 6, 1]}) == { + assert Add("foo", 6, "ol", 6).apply({"foo": [5, 6, 6, 1]}) == { "foo": [5, 6, 6, 1, 6] }, "Testing inserting past the list length." assert Add("foo", "bar").apply({"foo": "not bar"}) == { @@ -380,30 +439,33 @@ def test_apply_add(): def test_apply_delete(): - assert Delete("foo").apply({"bar": "foo"}) == { + assert Delete("foo", "foo").apply({"bar": "foo"}) == { "bar": "foo" }, "testing a failed attempt at removing a key" + assert Delete("foo", "bar").apply({"foo": "foo"}) == { + "foo": "foo" + }, "testing a failed attempt at removing a key with the wrong value" assert ( - Delete("foo").apply({"foo": "bar"}) == {} + Delete("foo", "bar").apply({"foo": "bar"}) == {} ), "testing removal of last entry in a dict." - assert Delete("foo/3").apply({"foo": [1, 1, 2, 1, 4]}) == { + assert Delete("foo", 1, "ol", 3).apply({"foo": [1, 1, 2, 1, 4]}) == { "foo": [1, 1, 2, 4] }, "testing removal of an specific index in a list." - assert Delete("foo/*").apply({"foo": [1, 2, 3, 4, 5]}) == { + assert Delete("foo", 5, "ul").apply({"foo": [1, 2, 3, 4, 5]}) == { "foo": [1, 2, 3, 4] - }, "testing removal of the last element in a list." - assert Delete("foo/6").apply({"foo": [1, 2, 3]}) == { - "foo": [1, 2] - }, "testing removal of an index out of scope in the list." - assert Delete("foo/bar").apply({"foo": {"bar": "foo", "foo": "bar"}}) == { + }, "testing removal of an element by value in an unordered list" + assert Delete("foo", "8", "ul").apply({"foo": [1, 2, 3]}) == { + "foo": [1, 2, 3] + }, "testing removal of an element not within an unordered list" + assert Delete("foo/bar", "foo").apply({"foo": {"bar": "foo", "foo": "bar"}}) == { "foo": {"foo": "bar"} }, "testing removal of item in a nested dict." assert ( - Delete("foo").apply({"foo": [1, 2, 3, 4]}) == {} + Delete("foo", [1, 2, 3, 4]).apply({"foo": [1, 2, 3, 4]}) == {} ), "Testing full removal of an array." - assert Delete("foo/3").apply({"foo": [1, 2, 3, {"bar": "foo"}]}) == { - "foo": [1, 2, 3] - }, "testing removal of a dict in an array" + assert Delete("foo", {"bar": "foo"}, "ul").apply( + {"foo": [1, 2, 3, {"bar": "foo"}]} + ) == {"foo": [1, 2, 3]}, "testing removal of a dict in an array" def test_diff_adds(): @@ -414,26 +476,37 @@ def test_diff_adds(): [Add("foo/foo", "bar")] ), "Error in adding a key to a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 4, 5, 6]}) == Patch( - [Add("foo", [5, 6])] + [Add("foo", 5, "ul"), Add("foo", 6, "ul")] ), "Error in adding values to an array." assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 1, 1, 1, 2, 3, 4]}) == Patch( - [Add("foo", [1, 1, 1])] + [Add("foo", 1, "ol", 1), Add("foo", 1, "ol", 2), Add("foo", 1, "ol", 3)] ), "Error in tracking the number of duplicate values added." def test_diff_deletes(): assert diff_dicts({"foo": "bar"}, {}) == Patch( - [Delete("foo")] + [Delete("foo", "bar")] ), "Error in deleting a key from a dictionary." assert diff_dicts( {"foo": {"foo": "bar", "bar": "foo"}}, {"foo": {"foo": "bar"}} - ) == Patch([Delete("foo/bar")]), "Error deleting a value in a nested dict" + ) == Patch([Delete("foo/bar", "foo")]), "Error deleting a value in a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3]}) == Patch( - [Delete("foo", [4])] + [Delete("foo", 4, "ul")] ), "error deleting a value in an array" assert diff_dicts({"foo": {"foo": "bar"}, "bar": "foo"}, {"bar": "foo"}) == Patch( - [Delete("foo")] + [Delete("foo", {"foo": "bar"})] ), "Error in deleting a dict." assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 3]}) == Patch( - [Delete("foo", [2, 4])] + [Delete("foo", "2", "ul"), Delete("foo", "4", "ul")] ), "error in deleting a non-final element in a list." + + +def test_inverts(): + + def invert_test(doc1: dict, doc2: dict): + diff = diff_dicts(doc1, doc2) + return diff.invert().apply(doc2) == doc1 + + assert diff_dicts({"foo": "bar"}, {"foo": "foo"}).invert().apply( + {"foo": "foo"} + ) == {"foo": "bar"}, "Simple invert test." From b36941970601be755ae3eeb6c8422ea02475ac22 Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Fri, 30 Aug 2024 19:46:14 -0400 Subject: [PATCH 3/9] Added option to enable overwrite when applying Add/Delete/Patch objects. --- sandbox.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sandbox.py b/sandbox.py index 5e90634fba9..582737dd7a3 100644 --- a/sandbox.py +++ b/sandbox.py @@ -88,7 +88,7 @@ def __init__( self.value = value self.list_type = list_type - def apply(self, doc: dict): + def apply(self, doc: dict, overwrite: bool = False): pass def invert(self): @@ -106,7 +106,7 @@ def __init__( ): super().__init__(path, value, list_type, index) - def apply(self, document: dict) -> dict: + def apply(self, document: dict, overwrite: bool = False) -> dict: doc = deepcopy(document) entry = doc key_list = self.path.split("/") @@ -127,7 +127,11 @@ def apply(self, document: dict) -> dict: else: print("failed to add to list") else: + if overwrite: + entry[key_list[-1]] = self.value + entry.setdefault(key_list[-1], self.value) + return doc def invert(self): @@ -152,7 +156,7 @@ def __init__( ): super().__init__(path, value, list_type, index) - def apply(self, document: dict | list): + def apply(self, document: dict, overwrite: bool = False): doc = deepcopy(document) entry = doc key_list = self.path.split("/") @@ -166,15 +170,15 @@ def apply(self, document: dict | list): add_point = entry.get(key_list[-1], None) if add_point is None: return doc - if add_point == self.value: + if overwrite or add_point == self.value: entry.pop(key_list[-1]) else: if isinstance(add_point, list): if ( - self.list_type == "ol" - and self.index - and add_point[self.index] == self.value + self.index + and self.list_type == "ol" + and (overwrite or add_point[self.index] == self.value) ): add_point.pop(self.index) elif self.list_type == "ul": @@ -205,10 +209,10 @@ def extend(self, changes): else: self.change_list.extend(changes.change_list) - def apply(self, doc): + def apply(self, doc: dict, overwrite: bool = False): changed_doc = deepcopy(doc) for change in self.change_list: - changed_doc = change.apply(changed_doc) + changed_doc = change.apply(changed_doc, overwrite) return changed_doc # out of place operation, returns a new Patch object. From 6674f933820bd21bfeb2a205d00f10aad1bc1866 Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Mon, 2 Sep 2024 08:28:00 -0400 Subject: [PATCH 4/9] Added more notes, refactored diffing unordered lists to be cleaner/more efficient. --- sandbox.py | 106 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 20 deletions(-) diff --git a/sandbox.py b/sandbox.py index 582737dd7a3..90b263e6ffe 100644 --- a/sandbox.py +++ b/sandbox.py @@ -74,6 +74,34 @@ # generate and compare/add the diffs together of the next function.) # - If the value's not in a list, then just check if it matches the old value. if it does, then delete it. # + + +class HashableDict(dict): + def __hash__(self): + return hash(tuple(sorted(self.items()))) + + +def convert_to_hashable( + doc: dict[any, any] | list[any] +) -> HashableDict[any, any] | tuple[any]: + if type(doc) is doc: + hashable_dict = HashableDict() + for key in doc.keys(): + if type(doc[key]) is dict: + hashable_dict[key] = convert_to_hashable(dict[key]) + elif type(doc[key]) is list: + hashable_dict[key] = tuple( + [convert_to_hashable(elem) for elem in doc[key]] + ) + else: + hashable_dict[key] = doc[key] + return hashable_dict + elif type(doc) is list: + return tuple([convert_to_hashable(elem) for elem in doc]) + else: + return doc + + @dataclass class Change: def __init__( @@ -231,31 +259,31 @@ def find_key_by_value(find_value, index: dict): def diff_unordered_list(list1: list, list2: list, path: str = "") -> Patch: - item_id_to_count: dict = {} - item_id_to_item: dict = {} - item_id: int = 0 + item_to_count: dict = {} + hash_to_original: dict = {} change_list = Patch([]) for elem in list2: - if elem not in item_id_to_item.values(): - item_id_to_item[item_id] = elem - item_id += 1 - if item_id := find_key_by_value(elem, item_id_to_item): - item_id_to_count[item_id] = item_id_to_count.get(item_id, 0) + 1 + elem_hash = convert_to_hashable(elem) + hash_to_original.set_default(elem_hash, elem) + hash_to_original[elem] + item_to_count.setdefault(elem_hash, 0) + item_to_count[elem_hash] += 1 for elem in list1: - if elem not in item_id_to_item.values(): - item_id_to_item[item_id] = elem - item_id += 1 - if item_id := find_key_by_value(elem, item_id_to_item): - item_id_to_count[item_id] = item_id_to_count.get(item_id, 0) - 1 + elem_hash = convert_to_hashable(elem) + hash_to_original.set_default(elem_hash, elem) + item_to_count.setdefault(elem_hash, 0) + item_to_count[elem_hash] -= 1 - for key, value in item_id_to_count.items(): + for key, value in item_to_count.items(): if value > 0: for i in range(value): - change_list.append(Add(f'{path}', item_id_to_item[key], list_type="ul")) + change_list.append( + Add(f'{path}', hash_to_original[key], list_type="ul") + ) elif value < 0: for i in range(abs(value)): change_list.append( - Delete(f'{path}', item_id_to_item[key], list_type="ul") + Delete(f'{path}', hash_to_original[key], list_type="ul") ) return change_list @@ -288,15 +316,36 @@ def diff_ordered_list(input1: list, input2: list, path: str = "") -> Patch: # So my current issue has to do with figuring out a way to efficiently diff sections of a list. # step 1, match if the same, adding the index object to the shared_values array. # step 2, if not the same, increment the right until they are the same, and continue. If right goes over the -# step 3: To handle duplicates; do I need to run this multiple times, for each possible configuration? That feels like the best way, but also like it could get -# expensive to manage. +# step 3: To handle duplicates; do I need to run this multiple times, for each possible configuration? That feels like +# the best way, but also like it could get expensive to manage. # Alright, let's think of it this way. There's a, per repetition of a^x b^y c^z, etc, it'll be a total of x*y*z combinations. How often will this occur? # Probably not often. However, this will lead to issues in situations of...... Alright, let's consider the possibility of + + +# [TODO: Implement that idea of running this function once for each permutation of duplicates being what it skips to. Then, after doing this, take the shortest +# Patch path. Additional note: Consider improving performance of this by, rather than having to iterate through the list to find the next occcurence, save the +# ids of the values in a way thtat will let you automatically O(1) access the next one. ] def find_shared_sequences(input1: list, input2: list): right_pointer = 0 left_pointer = 0 - shared_doc1_indices = [] - shared_doc2_indices = [] + shared_doc1_indices: list[int] = [] + shared_doc2_indices: list[int] = [] + # Alright, I now have the ability to check indices for specific elements in O(1) time. That's helpful. + # Now what I need is to generate different ways of pairing up the lists, so that's going to be... + # Lists of lists? Yeah, because each time I do, I copy and extend with the new value? + # And then I can quickly check each index to see if they're in order. If not, it can get removed. + # How to calculate different pairs... It's easy if the element appears once in either list. [1], [1,2,3] becomes [(1,1), (1,2), (1,3)] + # Now, it becomes slightly harder if both pairs have two or more occurrences. [1,2], [1,2,3] becomes [[(1,1), (2,2)], + # [(1,1), (2,3)], [(1,2), (2,3)], [(2,1)], [(1,3)]] + # Invalid are [(1,2), (2,1)]... No, I don't need full iteration, because I already know I want to match where possible. So, the 3 options become: + # [(1,1),(2,2)], [(1,1), (2,3)], [(1,2), (2,3)], which is...choose 2 from 3, alright. This makes sense, combinations don't care about order. + # Now, how to generate this lists of tuples in terms of a function.... + # The problem is that I cannot do in blindly based on the matching indexes for a singular element. I will need partial matches, because the ways in which other sequences + # work will be interfered with by this proccess. + + # Belay this: I don't yet know if it's necessary, let's wait on that. + + # Building this index takes care of counting duplicates, which is needed in order to properly get the most efficient version of differences. while left_pointer < len(input1): if ( right_pointer < len(input2) @@ -318,6 +367,15 @@ def find_shared_sequences(input1: list, input2: list): return (shared_doc1_indices, shared_doc2_indices) +def generate_possible_index_matches( + index1: list[int], index2: list[int] +) -> list[list[tuple[int, int]]]: + if len(index1) == len(index2): + return list(index1, index2) + min_list = deepcopy(index1) if len(index1) <= len(index2) else deepcopy(index2) + max_list = index2 if min_list == index1 else index1 + + def diff_dicts( doc1: dict, doc2: dict, @@ -387,6 +445,14 @@ def diff_dicts( # [start here] + + +# Permutations to test: +# 1: Insertion, Deletion +# 2: As normal, into an ordered list, unordered list. +# 3: Forced, Non-Forced +# 4: Successful, Failing +# 5: iinversion def test_diff_ordered_list(): assert diff_ordered_list([1, 2, 3, 4], [1, 2, 4, 5]) == Patch( From f43cb726ef322d07184cd707fcfbf5e3563397bb Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Thu, 5 Sep 2024 13:50:49 -0400 Subject: [PATCH 5/9] misc edits --- sandbox.py | 349 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 195 insertions(+), 154 deletions(-) diff --git a/sandbox.py b/sandbox.py index 90b263e6ffe..49f083fb9de 100644 --- a/sandbox.py +++ b/sandbox.py @@ -51,7 +51,7 @@ # - Adding an element to a list (orderless) # - Adding an element to a list (ordered) from copy import deepcopy -from dataclasses import dataclass +from dataclasses import dataclass, field from contextlib import suppress from typing import Literal @@ -84,17 +84,10 @@ def __hash__(self): def convert_to_hashable( doc: dict[any, any] | list[any] ) -> HashableDict[any, any] | tuple[any]: - if type(doc) is doc: + if type(doc) is dict: hashable_dict = HashableDict() for key in doc.keys(): - if type(doc[key]) is dict: - hashable_dict[key] = convert_to_hashable(dict[key]) - elif type(doc[key]) is list: - hashable_dict[key] = tuple( - [convert_to_hashable(elem) for elem in doc[key]] - ) - else: - hashable_dict[key] = doc[key] + hashable_dict[key] = convert_to_hashable(dict[key]) return hashable_dict elif type(doc) is list: return tuple([convert_to_hashable(elem) for elem in doc]) @@ -104,70 +97,105 @@ def convert_to_hashable( @dataclass class Change: - def __init__( + + path: list[str | int] + value: str | list | int = "" + list_type: Literal["ol", "ul", None] = None + index: None | int = None + + def apply( self, - path: str, - value: str | list | int = "", - list_type: Literal["ol", "ul", None] = None, - index: None | int = None, + doc: dict, + conflict_options: Literal["error", "keep-old", "overwrite"] = "keep-old", ): - self.path = path - self.index = index - self.value = value - self.list_type = list_type - - def apply(self, doc: dict, overwrite: bool = False): pass - def invert(self): + def invert(self) -> 'Change': pass +def traverse_nested_dicts_and_lists( + path: list, document: dict, create_missing_path: bool +): + + for key in path[:-1]: + if type(entry) is list: + try: + key = int(key) + entry = entry[key] + except: + raise ValueError("List indices must be integers") + elif type(entry) is dict: + if create_missing_path: + entry.setdefault(key, {}) + entry = entry[key] + else: + raise ValueError( + "Error: Cannot index into a type that is not Dict or List." + ) + return (entry, path[-1]) + + # Inverting an Add means that it deletes. +# Consider if In-Place/Out-Of-Place should be an input option. class Add(Change): - def __init__( - self, - path: str, - value: str | list | int = "", - list_type: Literal["ol", "ul", None] = None, - index: None | int = None, - ): - super().__init__(path, value, list_type, index) - def apply(self, document: dict, overwrite: bool = False) -> dict: - doc = deepcopy(document) + # TODO: Create a tripartite set of options to replace overwrite. Error with conflict, keep old, or overwrite. + def apply( + self, + doc: dict, + conflict_options: Literal["error", "keep-old", "overwrite"] = "keep-old", + ) -> dict: entry = doc - key_list = self.path.split("/") - for key in key_list[:-1]: - entry.setdefault(key, {}) - entry = entry[key] + for key in self.path[:-1]: + if type(entry) is list: + try: + key = int(key) + entry = entry[key] + except: + raise ValueError("List indices must be integers") + elif type(entry) is dict: + entry.setdefault(key, {}) + entry = entry[key] + else: + raise ValueError( + "Cannot index into a value other than a dictionary or list." + ) # If the value is a list, and there exists a key:value pair at the full end of the path, we want to instead # add the elements of the value into the list. - add_point = entry.get(key_list[-1], None) + add_point = entry.get(self.path[-1], None) if isinstance(add_point, list): - if self.list_type == "ol" and self.index: - add_point.insert(self.index, self.value) - elif self.list_type == "ul" or ( - self.list_type == "ol" and self.index and self.index >= len(add_point) + if self.list_type == "ul" or ( + self.list_type == "ol" + and self.index is not None + and self.index >= len(add_point) ): add_point.append(self.value) + elif self.list_type == "ol" and self.index is not None: + add_point.insert(self.index, self.value) else: - print("failed to add to list") + if self.list_type is not None and self.index is None: + raise ValueError( + f'No index exists for list type: [{self.list_type}]' + ) + else: + raise ValueError(f'Invalid List Type') else: - if overwrite: - entry[key_list[-1]] = self.value + if conflict_options == "overwrite": + entry[self.path[-1]] = self.value + elif conflict_options == "error": + raise ValueError( + f'Error in adding to key at path {self.path}, as a value already exists at that key.' + ) - entry.setdefault(key_list[-1], self.value) + entry.setdefault(self.path[-1], self.value) return doc - def invert(self): + def invert(self) -> Change: return Delete(self.path, self.value, self.list_type, self.index) - def __repr__(self): - return f'Add({self.path}, {self.value}{", " + self.list_type if self.list_type else ""}){"[" + str(self.index) + "]" if self.index else ""}' - # The Delete class does not need a value, normally. However, when deleting elements from a list, the value equals # the value of the element to be deleted. @@ -175,38 +203,47 @@ def __repr__(self): # Add's information: key, value, and path. The path for a list includes the index after the /. # Delete's old value is the value that was deleted. This is not necessary when it has a singular value, as in deleting an element from a list. class Delete(Change): - def __init__( + + def apply( self, - path: str, - value: str | list | int = "", - list_type: Literal["ol", "ul", None] = None, - index: None | int = None, + doc: dict, + conflict_options: Literal["error", "keep-old", "overwrite"] = "keep-old", ): - super().__init__(path, value, list_type, index) - - def apply(self, document: dict, overwrite: bool = False): - doc = deepcopy(document) entry = doc - key_list = self.path.split("/") - for key in key_list[:-1]: - entry = entry.get(key, None) + for key in self.path[:-1]: + if type(entry) is list: + try: + key = int(key) + entry = entry[key] + except: + return doc + elif type(entry) is dict: + entry = entry.get(key, None) if not entry: return doc - # If the value is a list, then we want to delete based off of the index provided at the end of the path. - - add_point = entry.get(key_list[-1], None) + # If the value is a list, then we want to delete based off of the index. + if type(entry) is dict: + add_point = entry.get(self.path[-1], None) + elif type(entry) is list: + try: + key = int(self.path[-1]) + add_point = entry[key] + except: + raise ValueError("List indexes must be integer values.") if add_point is None: return doc - if overwrite or add_point == self.value: - entry.pop(key_list[-1]) + if conflict_options == "overwrite" or add_point == self.value: + entry.pop(self.path[-1]) else: - if isinstance(add_point, list): if ( - self.index + self.index is not None and self.list_type == "ol" - and (overwrite or add_point[self.index] == self.value) + and ( + conflict_options == "overwrite" + or add_point[self.index] == self.value + ) ): add_point.pop(self.index) elif self.list_type == "ul": @@ -217,79 +254,68 @@ def apply(self, document: dict, overwrite: bool = False): def invert(self): return Add(self.path, self.value, self.list_type, self.index) - def __repr__(self): - return f'Delete({self.path}, {self.value}{", " + self.list_type if self.list_type else ""}){"[" + str(self.index) + "]" if self.index else ""}' - @dataclass class Patch: - def __init__(self, change_list: list[Change] | None = None): - self.change_list = change_list if change_list else [] + change_list: list[Change] = field(default_factory=list) + + # DONE THIS: Consider implementing Dataclass properly; look into how their constructors are defined. + def __post_init__(self): + self.change_list = [] if type(self.change_list) is None else self.change_list def append(self, change: Change): self.change_list.append(change) - # Python won't let me typehint this as a 'Patch' object. - def extend(self, changes): + def extend(self, changes: 'Patch| list[Change]') -> None: if type(changes) is list: self.change_list.extend(changes) else: self.change_list.extend(changes.change_list) - def apply(self, doc: dict, overwrite: bool = False): + # DONE THIS: Remove DeepCopy from Adds/Deletes, and only run via Patch. + def apply(self, doc: dict, conflict_options="keep-old") -> dict: changed_doc = deepcopy(doc) for change in self.change_list: - changed_doc = change.apply(changed_doc, overwrite) + changed_doc = change.apply(changed_doc, conflict_options) return changed_doc # out of place operation, returns a new Patch object. - def invert(self): + def invert(self) -> 'Patch': return Patch([change.invert() for change in reversed(self.change_list)]) def __repr__(self): return f'Patch({self.change_list!s})' -def find_key_by_value(find_value, index: dict): - for key, value in index.items(): - if value == find_value: - return key - return None - - def diff_unordered_list(list1: list, list2: list, path: str = "") -> Patch: item_to_count: dict = {} hash_to_original: dict = {} change_list = Patch([]) for elem in list2: elem_hash = convert_to_hashable(elem) - hash_to_original.set_default(elem_hash, elem) + hash_to_original.setdefault(elem_hash, elem) hash_to_original[elem] item_to_count.setdefault(elem_hash, 0) item_to_count[elem_hash] += 1 for elem in list1: elem_hash = convert_to_hashable(elem) - hash_to_original.set_default(elem_hash, elem) + hash_to_original.setdefault(elem_hash, elem) item_to_count.setdefault(elem_hash, 0) item_to_count[elem_hash] -= 1 for key, value in item_to_count.items(): if value > 0: for i in range(value): - change_list.append( - Add(f'{path}', hash_to_original[key], list_type="ul") - ) + change_list.append(Add(path, hash_to_original[key], list_type="ul")) elif value < 0: for i in range(abs(value)): - change_list.append( - Delete(f'{path}', hash_to_original[key], list_type="ul") - ) + change_list.append(Delete(path, hash_to_original[key], list_type="ul")) return change_list # What do I prioritize, in -def diff_ordered_list(input1: list, input2: list, path: str = "") -> Patch: +def diff_ordered_list(input1: list, input2: list, path: list[str | int] = []) -> Patch: changelist = Patch([]) shared_values = find_shared_sequences(input1, input2) deleted_indices = [x for x in range(len(input1)) if x not in shared_values[0]] @@ -367,20 +393,11 @@ def find_shared_sequences(input1: list, input2: list): return (shared_doc1_indices, shared_doc2_indices) -def generate_possible_index_matches( - index1: list[int], index2: list[int] -) -> list[list[tuple[int, int]]]: - if len(index1) == len(index2): - return list(index1, index2) - min_list = deepcopy(index1) if len(index1) <= len(index2) else deepcopy(index2) - max_list = index2 if min_list == index1 else index1 - - def diff_dicts( doc1: dict, doc2: dict, - prefix="", - default_list_handler: Literal["ordered", "unordered"] = "ordered", + path: list[str | int] = [], + default_list_handler: Literal["ordered", "unordered"] = "unordered", overrides: dict[str, Literal['ordered', 'unordered']] | None = None, ): if not overrides: @@ -397,8 +414,8 @@ def diff_dicts( if type(doc1[key]) is not type(doc2[key]): change_list.extend( [ - Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key]), - Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), + Delete(path + [key], doc1[key]), + Add(path + [key], doc2[key]), ] ) elif type(doc1[key]) is list: @@ -408,7 +425,7 @@ def diff_dicts( diff_unordered_list( doc1[key], doc2[key], - path=f'{prefix + "/" if prefix else ""}{key}', + path=path + [key], ) ) else: @@ -416,7 +433,7 @@ def diff_dicts( diff_ordered_list( doc1[key], doc2[key], - path=f'{prefix + "/" if prefix else ""}{key}', + path=path + [key], ) ) @@ -425,7 +442,7 @@ def diff_dicts( diff_dicts( doc1[key], doc2[key], - prefix=f'{prefix + "/" if prefix else ""}{key}', + path=path + [key], default_list_handler=default_list_handler, overrides=overrides, ) @@ -433,17 +450,21 @@ def diff_dicts( else: change_list.extend( [ - Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key]), - Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key]), + Delete(path + [key], doc1[key]), + Add(path + [key], doc2[key]), ] ) for key in deleted: - change_list.append(Delete(f'{prefix + "/" if prefix else ""}{key}', doc1[key])) + change_list.append(Delete(path + [key], doc1[key])) for key in added: - change_list.append(Add(f'{prefix + "/" if prefix else ""}{key}', doc2[key])) + change_list.append(Add(path + [key], doc2[key])) return change_list +def undo_arbitrary_version_changes(): + pass + + # [start here] @@ -453,121 +474,141 @@ def diff_dicts( # 3: Forced, Non-Forced # 4: Successful, Failing # 5: iinversion + + +# TODO: Look into Pytest shorthand for applying tests to different values in an array, rather than manually entering each one. ([inputs], failure message) for example, as each cell. def test_diff_ordered_list(): assert diff_ordered_list([1, 2, 3, 4], [1, 2, 4, 5]) == Patch( - [Delete("", 3, "ol", 2), Add("", 5, "ol", 3)] + [Delete([], 3, "ol", 2), Add([], 5, "ol", 3)] ), "testing diff with a replacement of a value" assert diff_ordered_list([], [1, 2, 3, 4]) == Patch( [ - Add("", 1, "ol", 0), - Add("", 2, "ol", 1), - Add("", 3, "ol", 2), - Add("", 4, "ol", 3), + Add([], 1, "ol", 0), + Add([], 2, "ol", 1), + Add([], 3, "ol", 2), + Add([], 4, "ol", 3), ] ), "testing diff adding to an empty dict" assert diff_ordered_list([1, 2, 3], []) == Patch( - [Delete("", 1, "ol", 0), Delete("", 2, "ol", 0), Delete("", 3, "ol", 0)] + [Delete([], 1, "ol", 0), Delete([], 2, "ol", 0), Delete([], 3, "ol", 0)] ), "Testing deleting all elements from a list." assert diff_ordered_list([0, 1, 2], [2, 1, 0]) == Patch( [ - Delete("", 1, "ol", 1), - Delete("", 2, "ol", 1), - Add("", 2, "ol", 0), - Add("", 1, "ol", 1), + Delete([], 1, "ol", 1), + Delete([], 2, "ol", 1), + Add([], 2, "ol", 0), + Add([], 1, "ol", 1), ] ), "testing rearrangement of elements" assert diff_ordered_list([{"1": "2"}], [{"2": "1"}]) == Patch( - [Delete("", {"1": "2"}, "ol", 0), Add("", {"2": "1"}, "ol", 0)] + [Delete([], {"1": "2"}, "ol", 0), Add([], {"2": "1"}, "ol", 0)] ), "testing replacement with dict elements" def test_apply_add(): - assert Add("foo", "bar").apply({}) == { + assert Add(["foo"], "bar").apply({}) == { "foo": "bar" }, "testing simple application of Add on an empty dict" - assert Add("foo/foo", "bar").apply({}) == { + assert Add(["foo", "foo"], "bar").apply({}) == { "foo": {"foo": "bar"} }, "testing simple adding of a dict-based value to an empty dict." - assert Add("foo", 1, "ol", 3).apply({"foo": [5, 6, 2, 3, 4]}) == { + assert Add(["foo"], 1, "ol", 3).apply({"foo": [5, 6, 2, 3, 4]}) == { "foo": [5, 6, 2, 1, 3, 4] }, "Testing inserting a value into a list." - assert Add("foo", 1, "ul").apply({"foo": [5, 6, 2, 3, 4]}) == { + assert Add(["foo"], 1, "ul").apply({"foo": [5, 6, 2, 3, 4]}) == { "foo": [5, 6, 2, 3, 4, 1] }, "Testing appending a value to a list." - assert Add("foo", [1, 2, 3, 4]).apply({}) == { + assert Add(["foo"], [1, 2, 3, 4]).apply({}) == { "foo": [1, 2, 3, 4] }, "testing adding a new key with a list value." - assert Add("foo", 6, "ol", 6).apply({"foo": [5, 6, 6, 1]}) == { + assert Add(["foo"], 6, "ol", 6).apply({"foo": [5, 6, 6, 1]}) == { "foo": [5, 6, 6, 1, 6] }, "Testing inserting past the list length." - assert Add("foo", "bar").apply({"foo": "not bar"}) == { + assert Add(["foo"], "bar").apply({"foo": "not bar"}) == { "foo": "not bar" }, "testing adding to a dict where the value already exists." + assert Add(["foo"], "bar").apply( + {"foo": "not bar"}, conflict_options="overwrite" + ) == { + "foo": "bar" + }, "testing an overwrite add to a dict where the value already exists." + assert Add(["foo", 1, "foo"], "bar").apply({"foo": [0, {}]}) == { + "foo": [0, {"foo": "bar"}] + } def test_apply_delete(): - assert Delete("foo", "foo").apply({"bar": "foo"}) == { + assert Delete(["foo"], "foo").apply({"bar": "foo"}) == { "bar": "foo" }, "testing a failed attempt at removing a key" - assert Delete("foo", "bar").apply({"foo": "foo"}) == { + assert Delete(["foo"], "bar").apply({"foo": "foo"}) == { "foo": "foo" }, "testing a failed attempt at removing a key with the wrong value" assert ( - Delete("foo", "bar").apply({"foo": "bar"}) == {} + Delete(["foo"], "bar").apply({"foo": "bar"}) == {} ), "testing removal of last entry in a dict." - assert Delete("foo", 1, "ol", 3).apply({"foo": [1, 1, 2, 1, 4]}) == { + assert Delete(["foo"], 1, "ol", 3).apply({"foo": [1, 1, 2, 1, 4]}) == { "foo": [1, 1, 2, 4] }, "testing removal of an specific index in a list." - assert Delete("foo", 5, "ul").apply({"foo": [1, 2, 3, 4, 5]}) == { + assert Delete(["foo"], 5, "ul").apply({"foo": [1, 2, 3, 4, 5]}) == { "foo": [1, 2, 3, 4] }, "testing removal of an element by value in an unordered list" - assert Delete("foo", "8", "ul").apply({"foo": [1, 2, 3]}) == { + assert Delete(["foo"], "8", "ul").apply({"foo": [1, 2, 3]}) == { "foo": [1, 2, 3] }, "testing removal of an element not within an unordered list" - assert Delete("foo/bar", "foo").apply({"foo": {"bar": "foo", "foo": "bar"}}) == { - "foo": {"foo": "bar"} - }, "testing removal of item in a nested dict." + assert Delete(["foo", "bar"], "foo").apply( + {"foo": {"bar": "foo", "foo": "bar"}} + ) == {"foo": {"foo": "bar"}}, "testing removal of item in a nested dict." assert ( - Delete("foo", [1, 2, 3, 4]).apply({"foo": [1, 2, 3, 4]}) == {} + Delete(["foo"], [1, 2, 3, 4]).apply({"foo": [1, 2, 3, 4]}) == {} ), "Testing full removal of an array." - assert Delete("foo", {"bar": "foo"}, "ul").apply( + assert Delete(["foo"], {"bar": "foo"}, "ul").apply( {"foo": [1, 2, 3, {"bar": "foo"}]} ) == {"foo": [1, 2, 3]}, "testing removal of a dict in an array" + assert Delete(["foo", 1, "foo"], "bar").apply({"foo": [0, {"foo": "bar"}]}) == { + "foo": [0, {}] + } def test_diff_adds(): assert diff_dicts({}, {"foo": "bar"}) == Patch( - [Add("foo", "bar")] + [Add(["foo"], "bar")] ), "Error in adding a single value to an empty dict." assert diff_dicts({"foo": {}}, {"foo": {"foo": "bar"}}) == Patch( - [Add("foo/foo", "bar")] + [Add(["foo", "foo"], "bar")] ), "Error in adding a key to a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 4, 5, 6]}) == Patch( - [Add("foo", 5, "ul"), Add("foo", 6, "ul")] + [Add(["foo"], 5, "ul"), Add(["foo"], 6, "ul")] ), "Error in adding values to an array." - assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 1, 1, 1, 2, 3, 4]}) == Patch( - [Add("foo", 1, "ol", 1), Add("foo", 1, "ol", 2), Add("foo", 1, "ol", 3)] + assert diff_dicts( + {"foo": [1, 2, 3, 4]}, + {"foo": [1, 1, 1, 1, 2, 3, 4]}, + overrides={"foo": "ordered"}, + ) == Patch( + [Add(["foo"], 1, "ol", 1), Add(["foo"], 1, "ol", 2), Add(["foo"], 1, "ol", 3)] ), "Error in tracking the number of duplicate values added." def test_diff_deletes(): assert diff_dicts({"foo": "bar"}, {}) == Patch( - [Delete("foo", "bar")] + [Delete(["foo"], "bar")] ), "Error in deleting a key from a dictionary." assert diff_dicts( {"foo": {"foo": "bar", "bar": "foo"}}, {"foo": {"foo": "bar"}} - ) == Patch([Delete("foo/bar", "foo")]), "Error deleting a value in a nested dict" + ) == Patch( + [Delete(["foo", "bar"], "foo")] + ), "Error deleting a value in a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3]}) == Patch( - [Delete("foo", 4, "ul")] + [Delete(["foo"], 4, "ul")] ), "error deleting a value in an array" assert diff_dicts({"foo": {"foo": "bar"}, "bar": "foo"}, {"bar": "foo"}) == Patch( - [Delete("foo", {"foo": "bar"})] + [Delete(["foo"], {"foo": "bar"})] ), "Error in deleting a dict." assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 3]}) == Patch( - [Delete("foo", "2", "ul"), Delete("foo", "4", "ul")] + [Delete(["foo"], 2, "ul"), Delete(["foo"], 4, "ul")] ), "error in deleting a non-final element in a list." From ff153f6273ea469487887e9fa9b0acf179910112 Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Fri, 6 Sep 2024 09:21:19 -0400 Subject: [PATCH 6/9] Additional changes, in parametrizing most pytest tests. --- sandbox.py | 102 +++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/sandbox.py b/sandbox.py index 49f083fb9de..a724d64da0d 100644 --- a/sandbox.py +++ b/sandbox.py @@ -54,7 +54,7 @@ from dataclasses import dataclass, field from contextlib import suppress from typing import Literal - +import pytest # When should an Add be inverted? And what does it mean, what's the desired behavior? # The desired behavior is split into three sections. @@ -461,8 +461,10 @@ def diff_dicts( return change_list -def undo_arbitrary_version_changes(): - pass +def undo_arbitrary_version_changes(prior_version:dict, version_to_undo:dict, latest_version:dict): + changeset_to_undo = diff_dicts(prior_version, version_to_undo).invert() + return changeset_to_undo.apply(latest_version) + # [start here] @@ -477,66 +479,52 @@ def undo_arbitrary_version_changes(): # TODO: Look into Pytest shorthand for applying tests to different values in an array, rather than manually entering each one. ([inputs], failure message) for example, as each cell. -def test_diff_ordered_list(): - assert diff_ordered_list([1, 2, 3, 4], [1, 2, 4, 5]) == Patch( - [Delete([], 3, "ol", 2), Add([], 5, "ol", 3)] - ), "testing diff with a replacement of a value" - assert diff_ordered_list([], [1, 2, 3, 4]) == Patch( + +DIFF_ORDERED_LISTS_TESTS = [ + ([1, 2, 3, 4], [1, 2, 4, 5], Patch([Delete([], 3, "ol", 2), Add([], 5, "ol", 3)]),"testing diff with a replacement of a value"), + ([], [1, 2, 3, 4], Patch( [ Add([], 1, "ol", 0), Add([], 2, "ol", 1), Add([], 3, "ol", 2), Add([], 4, "ol", 3), - ] - ), "testing diff adding to an empty dict" - assert diff_ordered_list([1, 2, 3], []) == Patch( - [Delete([], 1, "ol", 0), Delete([], 2, "ol", 0), Delete([], 3, "ol", 0)] - ), "Testing deleting all elements from a list." - assert diff_ordered_list([0, 1, 2], [2, 1, 0]) == Patch( + ]), "testing diff adding to an empty dict"), + ([1, 2, 3], [],Patch( + [Delete([], 1, "ol", 0), Delete([], 2, "ol", 0), Delete([], 3, "ol", 0)]), "Testing deleting all elements from a list."), + ([0, 1, 2], [2, 1, 0], Patch( [ Delete([], 1, "ol", 1), Delete([], 2, "ol", 1), Add([], 2, "ol", 0), Add([], 1, "ol", 1), ] - ), "testing rearrangement of elements" - assert diff_ordered_list([{"1": "2"}], [{"2": "1"}]) == Patch( + ), "testing rearrangement of elements" ), + ([{"1": "2"}], [{"2": "1"}], Patch( [Delete([], {"1": "2"}, "ol", 0), Add([], {"2": "1"}, "ol", 0)] - ), "testing replacement with dict elements" - - -def test_apply_add(): - - assert Add(["foo"], "bar").apply({}) == { - "foo": "bar" - }, "testing simple application of Add on an empty dict" - assert Add(["foo", "foo"], "bar").apply({}) == { - "foo": {"foo": "bar"} - }, "testing simple adding of a dict-based value to an empty dict." - assert Add(["foo"], 1, "ol", 3).apply({"foo": [5, 6, 2, 3, 4]}) == { - "foo": [5, 6, 2, 1, 3, 4] - }, "Testing inserting a value into a list." - assert Add(["foo"], 1, "ul").apply({"foo": [5, 6, 2, 3, 4]}) == { - "foo": [5, 6, 2, 3, 4, 1] - }, "Testing appending a value to a list." - assert Add(["foo"], [1, 2, 3, 4]).apply({}) == { - "foo": [1, 2, 3, 4] - }, "testing adding a new key with a list value." - assert Add(["foo"], 6, "ol", 6).apply({"foo": [5, 6, 6, 1]}) == { - "foo": [5, 6, 6, 1, 6] - }, "Testing inserting past the list length." - assert Add(["foo"], "bar").apply({"foo": "not bar"}) == { - "foo": "not bar" - }, "testing adding to a dict where the value already exists." - assert Add(["foo"], "bar").apply( - {"foo": "not bar"}, conflict_options="overwrite" - ) == { - "foo": "bar" - }, "testing an overwrite add to a dict where the value already exists." - assert Add(["foo", 1, "foo"], "bar").apply({"foo": [0, {}]}) == { - "foo": [0, {"foo": "bar"}] - } + ), "testing replacement with dict elements") + ] + +@pytest.mark.parametrize("before,after,expected,failure_message", DIFF_ORDERED_LISTS_TESTS) +def test_diff_ordered_list(before, after, expected, failure_message:str): + assert diff_ordered_list(before, after) == expected, failure_message + + +APPLY_ADD_TESTS = [ + (Add(["foo"], "bar"), ({}, "keep-old"), {"foo": "bar"},"testing simple application of Add on an empty dict"), + (Add(["foo", "foo"], "bar"), ({}, "keep-old"), {"foo": {"foo": "bar"}}, "testing simple adding of a dict-based value to an empty dict."), + (Add(["foo"], 1, "ol", 3), ({"foo": [5, 6, 2, 3, 4]}, "keep-old"), {"foo": [5, 6, 2, 1, 3, 4]},"Testing inserting a value into a list."), + (Add(["foo"], 1, "ul"),({"foo": [5, 6, 2, 3, 4]}, "keep-old"), {"foo": [5, 6, 2, 3, 4, 1]}, "Testing appending a value to a list."), + (Add(["foo"], [1, 2, 3, 4]), ({}, "keep-old"), {"foo":[1,2,3,4]}, "testing adding a key with a list value."), + (Add(["foo"], 6, "ol", 6), ({"foo": [5, 6, 6, 1]},"keep-old" ), {"foo": [5, 6, 6, 1 ,6]}, "testing insert past the index of the list's length"), + (Add(["foo"], "bar"), ({"foo": "not bar"}, "overwrite"), {"foo": "bar"}, "testing that overwrite alters a dict when the key already exists."), + ( Add(["foo", 1, "foo"], "bar"), ({"foo": [0, {}]}, "keep-old"), {"foo": [0, {"foo": "bar"}]}, "testing adding to a dictionary within a list.") +] +@pytest.mark.parametrize("change_data,args,expected,failure_message", APPLY_ADD_TESTS) +def test_apply_add(change_data: Change, args: tuple[dict, str], expected: dict, failure_message: str): + assert change_data.apply(*args) == expected, failure_message + + def test_apply_delete(): @@ -621,3 +609,17 @@ def invert_test(doc1: dict, doc2: dict): assert diff_dicts({"foo": "bar"}, {"foo": "foo"}).invert().apply( {"foo": "foo"} ) == {"foo": "bar"}, "Simple invert test." + + +#Alright, let's consider what total tests we might need. Here are the things that can go wrong: +# Let's define the different error possibilities. +# Error 1: The type is no longer applicable (relevant key has changed type. +# Error 2: Item no longer exists in the unordered list. +# Error 3: Item no longer has the right index in the ordered list. (DIFFICULT TO MANAGE, THIS WILL BE THE PROBLEM) +# HOW TO SOLVE THIS: keep track of the index shifting that occurs between version_to_undo and latest_version. +# What this means: I need a list of edits that map one number to another. So how am I going to do this? Does 'find shared sequences' work? It might. +# This error is also going to show up with complex nested lists/dicts, so how am I going to do this? I need a new stage in 'invert' that generates mappings: +# Mappings will be a way of mapping old indexes to new indexes. The new index can be null, so None can be an option. For example. +# [1,2, 3, 4] [2, 3, 4] is a mapping of {1: None, 2:0, 3:1, 4:2}. However, it'd be better if I find a way to describe it using ranges, for better performance. +# The mapping is not actually going to be a tool for matching the same item, but a way of describing to the list the shifts that have been made to other item's locations. +# This means that I will need a new object type, because lists won't actually cut it here. \ No newline at end of file From 04e0b1bd20bde61e47066c1352f55d26a4b4913a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:22:26 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sandbox.py | 167 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 118 insertions(+), 49 deletions(-) diff --git a/sandbox.py b/sandbox.py index a724d64da0d..c54aad27854 100644 --- a/sandbox.py +++ b/sandbox.py @@ -461,12 +461,13 @@ def diff_dicts( return change_list -def undo_arbitrary_version_changes(prior_version:dict, version_to_undo:dict, latest_version:dict): +def undo_arbitrary_version_changes( + prior_version: dict, version_to_undo: dict, latest_version: dict +): changeset_to_undo = diff_dicts(prior_version, version_to_undo).invert() return changeset_to_undo.apply(latest_version) - # [start here] @@ -481,50 +482,118 @@ def undo_arbitrary_version_changes(prior_version:dict, version_to_undo:dict, lat # TODO: Look into Pytest shorthand for applying tests to different values in an array, rather than manually entering each one. ([inputs], failure message) for example, as each cell. -DIFF_ORDERED_LISTS_TESTS = [ - ([1, 2, 3, 4], [1, 2, 4, 5], Patch([Delete([], 3, "ol", 2), Add([], 5, "ol", 3)]),"testing diff with a replacement of a value"), - ([], [1, 2, 3, 4], Patch( - [ - Add([], 1, "ol", 0), - Add([], 2, "ol", 1), - Add([], 3, "ol", 2), - Add([], 4, "ol", 3), - ]), "testing diff adding to an empty dict"), - ([1, 2, 3], [],Patch( - [Delete([], 1, "ol", 0), Delete([], 2, "ol", 0), Delete([], 3, "ol", 0)]), "Testing deleting all elements from a list."), - ([0, 1, 2], [2, 1, 0], Patch( - [ - Delete([], 1, "ol", 1), - Delete([], 2, "ol", 1), - Add([], 2, "ol", 0), - Add([], 1, "ol", 1), - ] - ), "testing rearrangement of elements" ), - ([{"1": "2"}], [{"2": "1"}], Patch( - [Delete([], {"1": "2"}, "ol", 0), Add([], {"2": "1"}, "ol", 0)] - ), "testing replacement with dict elements") - ] - -@pytest.mark.parametrize("before,after,expected,failure_message", DIFF_ORDERED_LISTS_TESTS) -def test_diff_ordered_list(before, after, expected, failure_message:str): +DIFF_ORDERED_LISTS_TESTS = [ + ( + [1, 2, 3, 4], + [1, 2, 4, 5], + Patch([Delete([], 3, "ol", 2), Add([], 5, "ol", 3)]), + "testing diff with a replacement of a value", + ), + ( + [], + [1, 2, 3, 4], + Patch( + [ + Add([], 1, "ol", 0), + Add([], 2, "ol", 1), + Add([], 3, "ol", 2), + Add([], 4, "ol", 3), + ] + ), + "testing diff adding to an empty dict", + ), + ( + [1, 2, 3], + [], + Patch([Delete([], 1, "ol", 0), Delete([], 2, "ol", 0), Delete([], 3, "ol", 0)]), + "Testing deleting all elements from a list.", + ), + ( + [0, 1, 2], + [2, 1, 0], + Patch( + [ + Delete([], 1, "ol", 1), + Delete([], 2, "ol", 1), + Add([], 2, "ol", 0), + Add([], 1, "ol", 1), + ] + ), + "testing rearrangement of elements", + ), + ( + [{"1": "2"}], + [{"2": "1"}], + Patch([Delete([], {"1": "2"}, "ol", 0), Add([], {"2": "1"}, "ol", 0)]), + "testing replacement with dict elements", + ), +] + + +@pytest.mark.parametrize( + "before,after,expected,failure_message", DIFF_ORDERED_LISTS_TESTS +) +def test_diff_ordered_list(before, after, expected, failure_message: str): assert diff_ordered_list(before, after) == expected, failure_message APPLY_ADD_TESTS = [ - (Add(["foo"], "bar"), ({}, "keep-old"), {"foo": "bar"},"testing simple application of Add on an empty dict"), - (Add(["foo", "foo"], "bar"), ({}, "keep-old"), {"foo": {"foo": "bar"}}, "testing simple adding of a dict-based value to an empty dict."), - (Add(["foo"], 1, "ol", 3), ({"foo": [5, 6, 2, 3, 4]}, "keep-old"), {"foo": [5, 6, 2, 1, 3, 4]},"Testing inserting a value into a list."), - (Add(["foo"], 1, "ul"),({"foo": [5, 6, 2, 3, 4]}, "keep-old"), {"foo": [5, 6, 2, 3, 4, 1]}, "Testing appending a value to a list."), - (Add(["foo"], [1, 2, 3, 4]), ({}, "keep-old"), {"foo":[1,2,3,4]}, "testing adding a key with a list value."), - (Add(["foo"], 6, "ol", 6), ({"foo": [5, 6, 6, 1]},"keep-old" ), {"foo": [5, 6, 6, 1 ,6]}, "testing insert past the index of the list's length"), - (Add(["foo"], "bar"), ({"foo": "not bar"}, "overwrite"), {"foo": "bar"}, "testing that overwrite alters a dict when the key already exists."), - ( Add(["foo", 1, "foo"], "bar"), ({"foo": [0, {}]}, "keep-old"), {"foo": [0, {"foo": "bar"}]}, "testing adding to a dictionary within a list.") + ( + Add(["foo"], "bar"), + ({}, "keep-old"), + {"foo": "bar"}, + "testing simple application of Add on an empty dict", + ), + ( + Add(["foo", "foo"], "bar"), + ({}, "keep-old"), + {"foo": {"foo": "bar"}}, + "testing simple adding of a dict-based value to an empty dict.", + ), + ( + Add(["foo"], 1, "ol", 3), + ({"foo": [5, 6, 2, 3, 4]}, "keep-old"), + {"foo": [5, 6, 2, 1, 3, 4]}, + "Testing inserting a value into a list.", + ), + ( + Add(["foo"], 1, "ul"), + ({"foo": [5, 6, 2, 3, 4]}, "keep-old"), + {"foo": [5, 6, 2, 3, 4, 1]}, + "Testing appending a value to a list.", + ), + ( + Add(["foo"], [1, 2, 3, 4]), + ({}, "keep-old"), + {"foo": [1, 2, 3, 4]}, + "testing adding a key with a list value.", + ), + ( + Add(["foo"], 6, "ol", 6), + ({"foo": [5, 6, 6, 1]}, "keep-old"), + {"foo": [5, 6, 6, 1, 6]}, + "testing insert past the index of the list's length", + ), + ( + Add(["foo"], "bar"), + ({"foo": "not bar"}, "overwrite"), + {"foo": "bar"}, + "testing that overwrite alters a dict when the key already exists.", + ), + ( + Add(["foo", 1, "foo"], "bar"), + ({"foo": [0, {}]}, "keep-old"), + {"foo": [0, {"foo": "bar"}]}, + "testing adding to a dictionary within a list.", + ), ] -@pytest.mark.parametrize("change_data,args,expected,failure_message", APPLY_ADD_TESTS) -def test_apply_add(change_data: Change, args: tuple[dict, str], expected: dict, failure_message: str): - assert change_data.apply(*args) == expected, failure_message +@pytest.mark.parametrize("change_data,args,expected,failure_message", APPLY_ADD_TESTS) +def test_apply_add( + change_data: Change, args: tuple[dict, str], expected: dict, failure_message: str +): + assert change_data.apply(*args) == expected, failure_message def test_apply_delete(): @@ -611,15 +680,15 @@ def invert_test(doc1: dict, doc2: dict): ) == {"foo": "bar"}, "Simple invert test." -#Alright, let's consider what total tests we might need. Here are the things that can go wrong: -# Let's define the different error possibilities. -# Error 1: The type is no longer applicable (relevant key has changed type. +# Alright, let's consider what total tests we might need. Here are the things that can go wrong: +# Let's define the different error possibilities. +# Error 1: The type is no longer applicable (relevant key has changed type. # Error 2: Item no longer exists in the unordered list. # Error 3: Item no longer has the right index in the ordered list. (DIFFICULT TO MANAGE, THIS WILL BE THE PROBLEM) -# HOW TO SOLVE THIS: keep track of the index shifting that occurs between version_to_undo and latest_version. -# What this means: I need a list of edits that map one number to another. So how am I going to do this? Does 'find shared sequences' work? It might. -# This error is also going to show up with complex nested lists/dicts, so how am I going to do this? I need a new stage in 'invert' that generates mappings: -# Mappings will be a way of mapping old indexes to new indexes. The new index can be null, so None can be an option. For example. -# [1,2, 3, 4] [2, 3, 4] is a mapping of {1: None, 2:0, 3:1, 4:2}. However, it'd be better if I find a way to describe it using ranges, for better performance. -# The mapping is not actually going to be a tool for matching the same item, but a way of describing to the list the shifts that have been made to other item's locations. -# This means that I will need a new object type, because lists won't actually cut it here. \ No newline at end of file +# HOW TO SOLVE THIS: keep track of the index shifting that occurs between version_to_undo and latest_version. +# What this means: I need a list of edits that map one number to another. So how am I going to do this? Does 'find shared sequences' work? It might. +# This error is also going to show up with complex nested lists/dicts, so how am I going to do this? I need a new stage in 'invert' that generates mappings: +# Mappings will be a way of mapping old indexes to new indexes. The new index can be null, so None can be an option. For example. +# [1,2, 3, 4] [2, 3, 4] is a mapping of {1: None, 2:0, 3:1, 4:2}. However, it'd be better if I find a way to describe it using ranges, for better performance. +# The mapping is not actually going to be a tool for matching the same item, but a way of describing to the list the shifts that have been made to other item's locations. +# This means that I will need a new object type, because lists won't actually cut it here. From f5b287b8e45e253ebaf808bef164748480db593a Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Tue, 10 Sep 2024 19:05:54 -0400 Subject: [PATCH 8/9] Beginning considerations on Swap operation, further edits of testing. --- sandbox.py | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/sandbox.py b/sandbox.py index a724d64da0d..1029517b32d 100644 --- a/sandbox.py +++ b/sandbox.py @@ -99,7 +99,7 @@ def convert_to_hashable( class Change: path: list[str | int] - value: str | list | int = "" + value: str | list | int | tuple = "" list_type: Literal["ol", "ul", None] = None index: None | int = None @@ -136,8 +136,17 @@ def traverse_nested_dicts_and_lists( return (entry, path[-1]) +class Swap(Change): + + def __init__(self, path:list[int, str], values:tuple[int|list|dict|str, int|list|dict|str], indices: tuple[int, int]): + self.path = path, + self.values = values + self.indices = indices + super(self, path) # Inverting an Add means that it deletes. # Consider if In-Place/Out-Of-Place should be an input option. + + class Add(Change): # TODO: Create a tripartite set of options to replace overwrite. Error with conflict, keep old, or overwrite. @@ -255,11 +264,14 @@ def invert(self): return Add(self.path, self.value, self.list_type, self.index) + + + @dataclass class Patch: change_list: list[Change] = field(default_factory=list) - + # DONE THIS: Consider implementing Dataclass properly; look into how their constructors are defined. def __post_init__(self): self.change_list = [] if type(self.change_list) is None else self.change_list @@ -397,7 +409,7 @@ def diff_dicts( doc1: dict, doc2: dict, path: list[str | int] = [], - default_list_handler: Literal["ordered", "unordered"] = "unordered", + default_list_handler: Literal["ordered", "unordered"] = "ordered", overrides: dict[str, Literal['ordered', 'unordered']] | None = None, ): if not overrides: @@ -569,12 +581,11 @@ def test_diff_adds(): [Add(["foo", "foo"], "bar")] ), "Error in adding a key to a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 4, 5, 6]}) == Patch( - [Add(["foo"], 5, "ul"), Add(["foo"], 6, "ul")] + [Add(["foo"], 5, "ol",4 ), Add(["foo"], 6, "ol", 5)] ), "Error in adding values to an array." assert diff_dicts( {"foo": [1, 2, 3, 4]}, {"foo": [1, 1, 1, 1, 2, 3, 4]}, - overrides={"foo": "ordered"}, ) == Patch( [Add(["foo"], 1, "ol", 1), Add(["foo"], 1, "ol", 2), Add(["foo"], 1, "ol", 3)] ), "Error in tracking the number of duplicate values added." @@ -599,17 +610,27 @@ def test_diff_deletes(): [Delete(["foo"], 2, "ul"), Delete(["foo"], 4, "ul")] ), "error in deleting a non-final element in a list." +INVERT_TESTS = [ + ({"foo": "foo"}, {"foo": "bar"}, "basic case"), + ({"foo":"bar"}, {"foo":["bar"]}, "change of type"), + ({"foo": [1,2,3,4]}, {"foo": [4, 3, 2, 1]}, "checking reorder of a list ") -def test_inverts(): +] +@pytest.mark.parametrize("before, after,failure_message", INVERT_TESTS) +def test_inverts(before:dict, after:dict, failure_message:str): - def invert_test(doc1: dict, doc2: dict): - diff = diff_dicts(doc1, doc2) - return diff.invert().apply(doc2) == doc1 + assert diff_dicts(before, after).invert().apply( + after + ) == before, failure_message - assert diff_dicts({"foo": "bar"}, {"foo": "foo"}).invert().apply( - {"foo": "foo"} - ) == {"foo": "bar"}, "Simple invert test." +FINAL_TESTS = [ + ({"foo":"bar"}, {"foo": "not bar"}, {"ana": "kata", "foo": "not_bar"}, {"ana": "kata", "foo":"bar"},"basic case"), + ({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 5]}, {"foo": [4, 9, 10, 5]}, {"foo": [4, 9, 10, 4]}, "testing case of list edits w/o adjusting size"), +] +@pytest.mark.parametrize("before, after,latest, expected, failure_message", FINAL_TESTS) +def test_final(before:dict, after:dict, latest:dict, expected:dict, failure_message:str): + assert diff_dicts(before, after).invert().apply(latest) == expected, failure_message #Alright, let's consider what total tests we might need. Here are the things that can go wrong: # Let's define the different error possibilities. From 928bd7fabe494cf7c418183e579fc4e3b360b26b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 23:07:02 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sandbox.py | 61 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/sandbox.py b/sandbox.py index 8df17d82ded..46093582045 100644 --- a/sandbox.py +++ b/sandbox.py @@ -138,11 +138,18 @@ def traverse_nested_dicts_and_lists( class Swap(Change): - def __init__(self, path:list[int, str], values:tuple[int|list|dict|str, int|list|dict|str], indices: tuple[int, int]): - self.path = path, + def __init__( + self, + path: list[int, str], + values: tuple[int | list | dict | str, int | list | dict | str], + indices: tuple[int, int], + ): + self.path = (path,) self.values = values self.indices = indices super(self, path) + + # Inverting an Add means that it deletes. # Consider if In-Place/Out-Of-Place should be an input option. @@ -264,14 +271,11 @@ def invert(self): return Add(self.path, self.value, self.list_type, self.index) - - - @dataclass class Patch: change_list: list[Change] = field(default_factory=list) - + # DONE THIS: Consider implementing Dataclass properly; look into how their constructors are defined. def __post_init__(self): self.change_list = [] if type(self.change_list) is None else self.change_list @@ -650,7 +654,7 @@ def test_diff_adds(): [Add(["foo", "foo"], "bar")] ), "Error in adding a key to a nested dict" assert diff_dicts({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 4, 5, 6]}) == Patch( - [Add(["foo"], 5, "ol",4 ), Add(["foo"], 6, "ol", 5)] + [Add(["foo"], 5, "ol", 4), Add(["foo"], 6, "ol", 5)] ), "Error in adding values to an array." assert diff_dicts( {"foo": [1, 2, 3, 4]}, @@ -679,28 +683,45 @@ def test_diff_deletes(): [Delete(["foo"], 2, "ul"), Delete(["foo"], 4, "ul")] ), "error in deleting a non-final element in a list." + INVERT_TESTS = [ ({"foo": "foo"}, {"foo": "bar"}, "basic case"), - ({"foo":"bar"}, {"foo":["bar"]}, "change of type"), - ({"foo": [1,2,3,4]}, {"foo": [4, 3, 2, 1]}, "checking reorder of a list ") - + ({"foo": "bar"}, {"foo": ["bar"]}, "change of type"), + ({"foo": [1, 2, 3, 4]}, {"foo": [4, 3, 2, 1]}, "checking reorder of a list "), ] -@pytest.mark.parametrize("before, after,failure_message", INVERT_TESTS) -def test_inverts(before:dict, after:dict, failure_message:str): - assert diff_dicts(before, after).invert().apply( - after - ) == before, failure_message -FINAL_TESTS = [ - ({"foo":"bar"}, {"foo": "not bar"}, {"ana": "kata", "foo": "not_bar"}, {"ana": "kata", "foo":"bar"},"basic case"), - ({"foo": [1, 2, 3, 4]}, {"foo": [1, 2, 3, 5]}, {"foo": [4, 9, 10, 5]}, {"foo": [4, 9, 10, 4]}, "testing case of list edits w/o adjusting size"), +@pytest.mark.parametrize("before, after,failure_message", INVERT_TESTS) +def test_inverts(before: dict, after: dict, failure_message: str): + + assert diff_dicts(before, after).invert().apply(after) == before, failure_message + +FINAL_TESTS = [ + ( + {"foo": "bar"}, + {"foo": "not bar"}, + {"ana": "kata", "foo": "not_bar"}, + {"ana": "kata", "foo": "bar"}, + "basic case", + ), + ( + {"foo": [1, 2, 3, 4]}, + {"foo": [1, 2, 3, 5]}, + {"foo": [4, 9, 10, 5]}, + {"foo": [4, 9, 10, 4]}, + "testing case of list edits w/o adjusting size", + ), ] -@pytest.mark.parametrize("before, after,latest, expected, failure_message", FINAL_TESTS) -def test_final(before:dict, after:dict, latest:dict, expected:dict, failure_message:str): + + +@pytest.mark.parametrize("before, after,latest, expected, failure_message", FINAL_TESTS) +def test_final( + before: dict, after: dict, latest: dict, expected: dict, failure_message: str +): assert diff_dicts(before, after).invert().apply(latest) == expected, failure_message + # Alright, let's consider what total tests we might need. Here are the things that can go wrong: # Let's define the different error possibilities. # Error 1: The type is no longer applicable (relevant key has changed type.