Skip to content

Commit

Permalink
fix: mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Jul 25, 2024
1 parent b70fb1b commit 8b97dca
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions alexi/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from allennlp_light.modules.conditional_random_field import (
ConditionalRandomFieldWeightTrans,
)
from tokenizers import Tokenizer # type: ignore
from torch import nn
from torch.nn.utils.rnn import (
PackedSequence,
Expand All @@ -34,9 +35,6 @@
DEFAULT_RNN_MODEL = Path(__file__).parent / "models" / "rnn.pt"
FeatureFunc = Callable[[Sequence[T_obj]], Iterator[list[str]]]

if False:
from tokenizers import Tokenizer # type: ignore


class Bullet(Enum):
NUMERIC = re.compile(r"^(\d+)[\)\.°-]$")
Expand Down Expand Up @@ -74,7 +72,7 @@ def structure_features(page: Iterable[T_obj]) -> Iterator[list[str]]:
def layout_features(page: Iterable[T_obj]) -> Iterator[list[str]]:
"""Traits de mise en page pour entrainement d'un modèle."""
# Split page into lines
lines = list(line_breaks(page))
lines = list(line_breaks(list(page)))
prev_line_features: dict[str, int] = {}
for line in lines:
page_height = int(line[0]["page_height"])
Expand Down Expand Up @@ -352,7 +350,7 @@ def add_deltas(page):


def make_rnn_features(
page: Iterable[T_obj],
page: Sequence[T_obj],
labels: str = "literal",
) -> tuple[list[T_obj], list[str]]:
crf_features = list(
Expand Down Expand Up @@ -476,7 +474,7 @@ def make_rnn_data(
label_counts = Counter(itertools.chain.from_iterable(y))
id2label = sorted(label_counts.keys(), reverse=True)
label2id = dict((label, idx) for (idx, label) in enumerate(id2label))
feat2count = {name: Counter() for name in FEATNAMES}
feat2count: dict[str, Counter] = {name: Counter() for name in FEATNAMES}
if tokenizer is not None:
# FIXME: should use all tokens
feat2count["token"] = Counter()
Expand Down

0 comments on commit 8b97dca

Please sign in to comment.