From d72bb35638693f034286236c8942e71be6e06605 Mon Sep 17 00:00:00 2001 From: Jack Cushman Date: Thu, 4 Apr 2024 18:14:19 -0400 Subject: [PATCH] Add compatibility for hyperscan 0.7.7 --- eyecite/tokenizers.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/eyecite/tokenizers.py b/eyecite/tokenizers.py index ffe61dd..b7dbba7 100644 --- a/eyecite/tokenizers.py +++ b/eyecite/tokenizers.py @@ -520,7 +520,20 @@ def convert_regex(regex): cache_dir.mkdir(exist_ok=True) cache = cache_dir / fingerprint if cache.exists(): - hyperscan_db = hyperscan.loadb(cache.read_bytes()) + cache_bytes = cache.read_bytes() + try: + # hyperscan >= 0.5.0 added a mandatory mode argument + hyperscan_db = hyperscan.loadb( + cache_bytes, mode=hyperscan.HS_MODE_BLOCK + ) + except TypeError: + hyperscan_db = hyperscan.loadb(cache_bytes) + try: + # at some point Scratch became necessary -- + # https://github.com/darvid/python-hyperscan/issues/50#issuecomment-1386243477 + hyperscan_db.scratch = hyperscan.Scratch(hyperscan_db) + except AttributeError: + pass if not hyperscan_db: # No cache, so compile database.