Skip to content

Commit

Permalink
Merge pull request #6 from x-tabdeveloping/levenshtein
Browse files Browse the repository at this point in the history
Levenshtein
  • Loading branch information
x-tabdeveloping authored Sep 6, 2024
2 parents 2f3e7ab + 9de1df9 commit f338b5d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
10 changes: 9 additions & 1 deletion neofuzz/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def char_ngram_process(
ngram_range: Tuple[int, int] = (1, 5),
tf_idf: bool = True,
metric: str = "cosine",
refine_levenshtein: bool = False,
) -> Process:
"""Basic character n-gram based fuzzy search process.
Expand All @@ -416,6 +417,11 @@ def char_ngram_process(
Flag signifying whether the features should be tf-idf weighted.
metric: str, default 'cosine'
Distance metric to use for fuzzy search.
refine_levenshtein: bool, default None
Indicates whether results should be refined with Levenshtein distance
using TheFuzz.
This can increase the accuracy of your results.
If not specified, the process's attribute is used.
Returns
-------
Expand All @@ -426,4 +432,6 @@ def char_ngram_process(
vectorizer = TfidfVectorizer(ngram_range=ngram_range, analyzer="char")
else:
vectorizer = CountVectorizer(ngram_range=ngram_range, analyzer="char")
return Process(vectorizer, metric=metric)
return Process(
vectorizer, metric=metric, refine_levenshtein=refine_levenshtein
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
line-length=79
[tool.poetry]
name = "neofuzz"
version = "0.3.0"
version = "0.3.1"
description = "Blazing fast fuzzy text search for Python."
authors = ["Márton Kardos <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit f338b5d

Please sign in to comment.