Skip to content

Commit

Permalink
refactor: use "python-vendorize" for 3rd-party libs
Browse files Browse the repository at this point in the history
Signed-off-by: Jack Cherng <[email protected]>
  • Loading branch information
jfcherng committed Oct 31, 2024
1 parent 65047cb commit bc85d18
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 206 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.dist-info/ export-ignore
*.pyi export-ignore
.dependabot export-ignore
.editorconfig export-ignore
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Desktop.ini
Thumbs.db

# Python
*.dist-info/
*.py[cod]
.mypy_cache/
.ruff_cache/
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ pip-compile:
uv pip compile --upgrade requirements-dev.in -o requirements-dev.txt
uv pip compile --upgrade requirements-docs.in -o requirements-docs.txt

.PHONY: vendorize
vendorize:
python-vendorize

.PHONY: ci-check
ci-check:
@echo "========== check: mypy =========="
Expand Down
Empty file added plugin/_vendor/__init__.py
Empty file.
File renamed without changes.
19 changes: 10 additions & 9 deletions plugin/libs/trie/__init__.py → plugin/_vendor/trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from typing import Dict, Generator, Iterable
"""
A Trie/Prefix Tree is a kind of search tree used to provide quick lookup
of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity
making it impractical in practice. It however provides O(max(search_string, length of
longest word)) lookup time making it an optimal approach when space is not an issue.
@see https://github.com/TheAlgorithms/Python/blob/master/data_structures/trie/trie.py
This file has been modified by @jfcherng to fit his own use.
"""

class TrieNode:
"""
A Trie/Prefix Tree is a kind of search tree used to provide quick lookup
of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity
making it impractical in practice. It however provides O(max(search_string, length of
longest word)) lookup time making it an optimal approach when space is not an issue.
from typing import Dict, Generator, Iterable

This file has been modified by @jfcherng to fit his own use.
"""

class TrieNode:
def __init__(self) -> None:
self.nodes: Dict[str, TrieNode] = dict() # Mapping from char to TrieNode
self.is_leaf = False
Expand Down
146 changes: 146 additions & 0 deletions plugin/_vendor/triegex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import collections

__all__ = ('Triegex',)

OR = r'|'

# regex below matches nothing https://stackoverflow.com/a/940840/2183102. We
# use '~' to ensure it comes last when lexicographically sorted:
# max(string.printable) is '~'
NOTHING = r'~^(?#match nothing)'
GROUP = r'(?:{0})'
WORD_BOUNDARY = r'\b'


class TriegexNode:

def __init__(self, char: str, end: bool, *childrens):
self.char = char if char is not None else ''
self.end = end
self.childrens = {children.char: children for children in childrens}

def __iter__(self):
return iter(sorted(self.childrens.values(), key=lambda x: x.char))

def __len__(self):
return len(self.childrens)

def __repr__(self):
return f'<TriegexNode: \'{self.char}\' end={self.end}>'

def __contains__(self, key):
return key in self.childrens

def __getitem__(self, key):
return self.childrens[key]

def __delitem__(self, key):
del self.childrens[key]

def to_regex(self):
stack = [self]
ready = []
waiting = []

while stack:
waiting.append(stack.pop())
stack.extend(waiting[-1])

while waiting:
node = waiting.pop()
result = node.char

if node.end:
result += WORD_BOUNDARY

# if there is only one children, we can safely concatenate chars
# withoug nesting
elif len(node) == 1:
result += ready.pop()

elif len(node) > 1:
result += GROUP.format(OR.join(reversed(
[ready.pop() for _ in node]
)))

ready.append(result)
return ready[-1]


class Triegex(collections.MutableSet):

_root = None

def __init__(self, *words):
"""
Trigex constructor.
"""

# make sure we match nothing when no words are added
self._root = TriegexNode(None, False, TriegexNode(NOTHING, False))

for word in words:
self.add(word)

def add(self, word: str):
current = self._root
for letter in word[:-1]:
current = current.childrens.setdefault(letter,
TriegexNode(letter, False))
# this will ensure that we correctly match the word boundary
current.childrens[word[-1]] = TriegexNode(word[-1], True)

def to_regex(self):
r"""
Produce regular expression that will match each word in the
internal trie.
>>> t = Triegex('foo', 'bar', 'baz')
>>> t.to_regex()
'(?:ba(?:r\\b|z\\b)|foo\\b|~^(?#match nothing))'
"""
return self._root.to_regex()

def _traverse(self):
stack = [self._root]
current = self._root
while stack:
yield current
current = stack.pop()
stack.extend(current.childrens.values())

def __iter__(self):
paths = {self._root.char: []}
for node in self._traverse():
for children in node:
paths[children.char] = [node.char] + paths[node.char]
if children.end:
char = children.char
yield ''.join(reversed([char] + paths[char]))

def __len__(self):
return sum(1 for _ in self.__iter__())

def __contains__(self, word):
current = self._root
for char in word:
if char not in current:
return False
current = current[char]
return True and current.end # word has to end with the last char

def discard(self, word):
to_delete = [self._root]
current = self._root
for char in word:
if char not in current:
return
current = current[char]
to_delete.append(current)
if not to_delete[-1].end:
return
while len(to_delete) > 1:
node = to_delete.pop()
if len(node) == 0:
del to_delete[-1][node.char]
return
21 changes: 0 additions & 21 deletions plugin/libs/triegex/LICENSE

This file was deleted.

170 changes: 0 additions & 170 deletions plugin/libs/triegex/__init__.py

This file was deleted.

Loading

0 comments on commit bc85d18

Please sign in to comment.