From cdb3810ac5259f77731eab22a23713d01b116ba7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 Apr 2022 13:59:03 -0500 Subject: [PATCH 1/2] feat: QueryList: Filter lists of dictionaries w/ nested support --- docs/contributing/internals.md | 5 + libvcs/utils/__init__.py | 0 libvcs/utils/query_list.py | 219 ++++++++++++++++++++++++++++++ tests/utils/test_query_list.py | 240 +++++++++++++++++++++++++++++++++ 4 files changed, 464 insertions(+) create mode 100644 libvcs/utils/__init__.py create mode 100644 libvcs/utils/query_list.py create mode 100644 tests/utils/test_query_list.py diff --git a/docs/contributing/internals.md b/docs/contributing/internals.md index c456efb6..f797467b 100644 --- a/docs/contributing/internals.md +++ b/docs/contributing/internals.md @@ -18,3 +18,8 @@ stability policy. .. autoapimodule:: libvcs.types :members: ``` + +```{eval-rst} +.. autoapimodule:: libvcs.utils.query_list + :members: +``` diff --git a/libvcs/utils/__init__.py b/libvcs/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libvcs/utils/query_list.py b/libvcs/utils/query_list.py new file mode 100644 index 00000000..6c6456df --- /dev/null +++ b/libvcs/utils/query_list.py @@ -0,0 +1,219 @@ +import re +import traceback +from typing import Any, Callable, Optional, Protocol, Sequence, TypeVar, Union + +T = TypeVar("T", Any, Any) + + +def keygetter(obj, path): + """obj, "foods__breakfast", obj['foods']['breakfast'] + + >>> keygetter({ "foods": { "breakfast": "cereal" } }, "foods__breakfast") + 'cereal' + >>> keygetter({ "foods": { "breakfast": "cereal" } }, "foods") + {'breakfast': 'cereal'} + + """ + try: + sub_fields = path.split("__") + dct = obj + for sub_field in sub_fields: + dct = dct[sub_field] + return dct + except Exception as e: + traceback.print_exception(e) + return None + + +def parse_lookup(obj, path, lookup): + """Check if field lookup key, e.g. "my__path__contains" has comparator, return val. + + If comparator not used or value not found, return None. + + mykey__endswith("mykey") -> "mykey" else None + + >>> parse_lookup({ "food": "red apple" }, "food__istartswith", "__istartswith") + 'red apple' + """ + try: + if path.endswith(lookup): + if field_name := path.rsplit(lookup)[0]: + return keygetter(obj, field_name) + except Exception as e: + traceback.print_exception(e) + return None + + +class LookupProtocol(Protocol): + """Protocol for :class:`QueryList` filtering operators.""" + + def __call__(self, data: Union[list[str], str], rhs: Union[list[str], str]): + """Callback for :class:`QueryList` filtering operators.""" + + +def lookup_exact(data, rhs): + return rhs == data + + +def lookup_iexact(data, rhs): + return rhs.lower() == data.lower() + + +def lookup_contains(data, rhs): + return rhs in data + + +def lookup_icontains(data, rhs): + return rhs.lower() in data.lower() + + +def lookup_startswith(data, rhs): + return data.startswith(rhs) + + +def lookup_istartswith(data, rhs): + return data.lower().startswith(rhs.lower()) + + +def lookup_endswith(data, rhs): + return data.endswith(rhs) + + +def lookup_iendswith(data, rhs): + return data.lower().endswith(rhs.lower()) + + +def lookup_in(data, rhs): + if isinstance(rhs, list): + return data in rhs + return rhs in data + + +def lookup_nin(data, rhs): + if isinstance(rhs, list): + return data not in rhs + return rhs not in data + + +def lookup_regex(data, rhs): + return re.search(rhs, data) + + +def lookup_iregex(data, rhs): + return re.search(rhs, data, re.IGNORECASE) + + +LOOKUP_NAME_MAP: dict[str, LookupProtocol] = { + "eq": lookup_exact, + "exact": lookup_exact, + "iexact": lookup_iexact, + "contains": lookup_contains, + "icontains": lookup_icontains, + "startswith": lookup_startswith, + "istartswith": lookup_istartswith, + "endswith": lookup_endswith, + "iendswith": lookup_iendswith, + "in": lookup_in, + "nin": lookup_nin, + "regex": lookup_regex, + "iregex": lookup_iregex, +} + + +class QueryList(list[T]): + """Filter list of object/dicts. For small, local datasets. *Experimental, unstable*. + + >>> query = QueryList( + ... [ + ... { + ... "place": "Largo", + ... "city": "Tampa", + ... "state": "Florida", + ... "foods": {"fruit": ["banana", "orange"], "breakfast": "cereal"}, + ... }, + ... { + ... "place": "Chicago suburbs", + ... "city": "Elmhurst", + ... "state": "Illinois", + ... "foods": {"fruit": ["apple", "cantelope"], "breakfast": "waffles"}, + ... }, + ... ] + ... ) + >>> query.filter(place="Chicago suburbs")[0]['city'] + 'Elmhurst' + >>> query.filter(place__icontains="chicago")[0]['city'] + 'Elmhurst' + >>> query.filter(foods__breakfast="waffles")[0]['city'] + 'Elmhurst' + >>> query.filter(foods__fruit__in="cantelope")[0]['city'] + 'Elmhurst' + >>> query.filter(foods__fruit__in="orange")[0]['city'] + 'Tampa' + """ + + data: Sequence[T] + + def items(self): + data: Sequence[T] + + if self.pk_key is None: + raise Exception("items() require a pk_key exists") + return [(getattr(item, self.pk_key), item) for item in self] + + def __eq__(self, other): + data = other + + if not isinstance(self, list) or not isinstance(data, list): + return False + + if len(self) == len(data): + for (a, b) in zip(self, data): + if isinstance(a, dict): + a_keys = a.keys() + if a.keys == b.keys(): + for key in a_keys: + if abs(a[key] - b[key]) > 1: + return False + else: + if a != b: + return False + + return True + return False + + def filter(self, matcher: Optional[Union[Callable[[T], bool], T]] = None, **kwargs): + def filter_lookup(obj) -> bool: + for path, v in kwargs.items(): + try: + lhs, op = path.rsplit("__", 1) + + if op not in LOOKUP_NAME_MAP: + raise ValueError(f"{op} not in LOOKUP_NAME_MAP") + except ValueError: + lhs = path + op = "exact" + + assert op in LOOKUP_NAME_MAP + path = lhs + data = keygetter(obj, path) + + if not LOOKUP_NAME_MAP[op](data, v): + return False + + return True + + if callable(matcher): + _filter = matcher + elif matcher is not None: + + def val_match(obj): + if isinstance(matcher, list): + return obj in matcher + else: + return obj == matcher + + _filter = val_match + else: + _filter = filter_lookup + + return self.__class__(k for k in self if _filter(k)) diff --git a/tests/utils/test_query_list.py b/tests/utils/test_query_list.py new file mode 100644 index 00000000..6e0da2b1 --- /dev/null +++ b/tests/utils/test_query_list.py @@ -0,0 +1,240 @@ +from typing import Optional + +import pytest + +from libvcs.utils.query_list import QueryList + + +@pytest.mark.parametrize( + "items,filter_expr,expected_result", + [ + [[{"test": 1}], None, [{"test": 1}]], + [[{"test": 1}], None, QueryList([{"test": 1}])], + [[{"fruit": "apple"}], None, QueryList([{"fruit": "apple"}])], + [ + [{"fruit": "apple", "banana": object()}], + None, + QueryList([{"fruit": "apple", "banana": object()}]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__eq="apple"), + QueryList([{"fruit": "apple", "banana": object()}]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__eq="notmatch"), + QueryList([]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__exact="apple"), + QueryList([{"fruit": "apple", "banana": object()}]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__exact="notmatch"), + QueryList([]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__iexact="Apple"), + QueryList([{"fruit": "apple", "banana": object()}]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit__iexact="Notmatch"), + QueryList([]), + ], + [ + [{"fruit": "apple", "banana": object()}], + dict(fruit="notmatch"), + QueryList([]), + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit="apple"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__in="app"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__icontains="App"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__contains="app"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__regex=r"app.*"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__iregex=r"App.*"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__startswith="a"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__istartswith="AP"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__startswith="z"), + [], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__endswith="le"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__iendswith="LE"), + [{"fruit": "apple"}], + ], + [ + [{"fruit": "apple"}, {"fruit": "mango"}], + dict(fruit__endswith="z"), + [], + ], + [ + [ + {"fruit": "apple"}, + {"fruit": "mango"}, + {"fruit": "banana"}, + {"fruit": "kiwi"}, + ], + dict(fruit__in=["apple", "mango"]), + [{"fruit": "apple"}, {"fruit": "mango"}], + ], + [ + [ + {"fruit": "apple"}, + {"fruit": "mango"}, + {"fruit": "banana"}, + {"fruit": "kiwi"}, + ], + dict(fruit__nin=["apple", "mango"]), + [{"fruit": "banana"}, {"fruit": "kiwi"}], + ], + [ + [ + {"place": "book store", "city": "Tampa", "state": "Florida"}, + {"place": "coffee shop", "city": "Tampa", "state": "Florida"}, + { + "place": "chinese restaurant", + "city": "ybor city", + "state": "Florida", + }, + { + "place": "walt disney world", + "city": "Lake Buena Vista", + "state": "Florida", + }, + ], + dict(city="Tampa", state="Florida"), + [ + {"place": "book store", "city": "Tampa", "state": "Florida"}, + {"place": "coffee shop", "city": "Tampa", "state": "Florida"}, + ], + ], + [ + [ + {"place": "book store", "city": "Tampa", "state": "Florida"}, + {"place": "coffee shop", "city": "Tampa", "state": "Florida"}, + { + "place": "chinese restaurant", + "city": "ybor city", + "state": "Florida", + }, + { + "place": "walt disney world", + "city": "Lake Buena Vista", + "state": "Florida", + }, + ], + dict(place__contains="coffee", state="Florida"), + [ + {"place": "coffee shop", "city": "Tampa", "state": "Florida"}, + ], + ], + [ + [ + { + "place": "Largo", + "city": "Tampa", + "state": "Florida", + "foods": {"fruit": ["banana", "orange"], "breakfast": "cereal"}, + }, + { + "place": "Chicago suburbs", + "city": "Elmhurst", + "state": "Illinois", + "foods": {"fruit": ["apple", "cantelope"], "breakfast": "waffles"}, + }, + ], + dict(foods__fruit__contains="banana"), + [ + { + "place": "Largo", + "city": "Tampa", + "state": "Florida", + "foods": {"fruit": ["banana", "orange"], "breakfast": "cereal"}, + }, + ], + ], + [ + [ + { + "place": "Largo", + "city": "Tampa", + "state": "Florida", + "foods": {"fruit": ["banana", "orange"], "breakfast": "cereal"}, + }, + { + "place": "Chicago suburbs", + "city": "Elmhurst", + "state": "Illinois", + "foods": {"fruit": ["apple", "cantelope"], "breakfast": "waffles"}, + }, + ], + dict(foods__breakfast="cereal"), + [ + { + "place": "Largo", + "city": "Tampa", + "state": "Florida", + "foods": {"fruit": ["banana", "orange"], "breakfast": "cereal"}, + }, + ], + ], + [[1, 2, 3, 4, 5], None, QueryList([1, 2, 3, 4, 5])], + [[1, 2, 3, 4, 5], [1], QueryList([1])], + [[1, 2, 3, 4, 5], [1, 4], QueryList([1, 4])], + [[1, 2, 3, 4, 5], lambda val: 1 == val, QueryList([1])], + [[1, 2, 3, 4, 5], lambda val: 2 == val, QueryList([2])], + ], +) +def test_filter(items: list, filter_expr: Optional[dict], expected_result: list): + qs = QueryList(items) + if filter_expr is not None: + if isinstance(filter_expr, dict): + assert qs.filter(**filter_expr) == expected_result + else: + assert qs.filter(filter_expr) == expected_result + else: + assert qs.filter() == expected_result From c1649fe0bf95b0ef554846ed1f2fa569f155d26d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 Apr 2022 13:59:03 -0500 Subject: [PATCH 2/2] feat: QueryList: Filter lists of dictionaries w/ nested support --- libvcs/utils/query_list.py | 38 +++++++++++++++++++++++----------- tests/utils/test_query_list.py | 2 +- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/libvcs/utils/query_list.py b/libvcs/utils/query_list.py index 6c6456df..4d0a4c50 100644 --- a/libvcs/utils/query_list.py +++ b/libvcs/utils/query_list.py @@ -1,6 +1,7 @@ +import dataclasses import re import traceback -from typing import Any, Callable, Optional, Protocol, Sequence, TypeVar, Union +from typing import Any, Callable, Generic, Optional, Protocol, Sequence, TypeVar, Union T = TypeVar("T", Any, Any) @@ -120,9 +121,13 @@ def lookup_iregex(data, rhs): } -class QueryList(list[T]): +@dataclasses.dataclass(eq=False) +class QueryList(Generic[T]): """Filter list of object/dicts. For small, local datasets. *Experimental, unstable*. + :py:func:`dataclasses.dataclass` is only used for ``__repr__`` and pytest comparison + details. + >>> query = QueryList( ... [ ... { @@ -139,35 +144,44 @@ class QueryList(list[T]): ... }, ... ] ... ) - >>> query.filter(place="Chicago suburbs")[0]['city'] + >>> query.filter(place="Chicago suburbs").data[0]['city'] 'Elmhurst' - >>> query.filter(place__icontains="chicago")[0]['city'] + >>> query.filter(place__icontains="chicago").data[0]['city'] 'Elmhurst' - >>> query.filter(foods__breakfast="waffles")[0]['city'] + >>> query.filter(foods__breakfast="waffles").data[0]['city'] 'Elmhurst' - >>> query.filter(foods__fruit__in="cantelope")[0]['city'] + >>> query.filter(foods__fruit__in="cantelope").data[0]['city'] 'Elmhurst' - >>> query.filter(foods__fruit__in="orange")[0]['city'] + >>> query.filter(foods__fruit__in="orange").data[0]['city'] 'Tampa' """ + __slots__ = ("data", "pk_key") data: Sequence[T] + # def __init__(self, data, pk_key: Optional[str] = None): + # self.data: Sequence[T] = data + # #: Primary key for objects, optional. + # #: Use for .get(), .items() + # self.pk_key: Optional[Any] = pk_key + def items(self): data: Sequence[T] if self.pk_key is None: raise Exception("items() require a pk_key exists") - return [(getattr(item, self.pk_key), item) for item in self] + return [(getattr(item, self.pk_key), item) for item in self.data] def __eq__(self, other): data = other + if hasattr(data, "data"): + data = getattr(data, "data") - if not isinstance(self, list) or not isinstance(data, list): + if not isinstance(self.data, list) or not isinstance(data, list): return False - if len(self) == len(data): - for (a, b) in zip(self, data): + if len(self.data) == len(data): + for (a, b) in zip(self.data, data): if isinstance(a, dict): a_keys = a.keys() if a.keys == b.keys(): @@ -216,4 +230,4 @@ def val_match(obj): else: _filter = filter_lookup - return self.__class__(k for k in self if _filter(k)) + return self.__class__(data=[k for k in self.data if _filter(k)]) diff --git a/tests/utils/test_query_list.py b/tests/utils/test_query_list.py index 6e0da2b1..a2b597cb 100644 --- a/tests/utils/test_query_list.py +++ b/tests/utils/test_query_list.py @@ -230,7 +230,7 @@ ], ) def test_filter(items: list, filter_expr: Optional[dict], expected_result: list): - qs = QueryList(items) + qs = QueryList(data=items) if filter_expr is not None: if isinstance(filter_expr, dict): assert qs.filter(**filter_expr) == expected_result