Skip to content

Commit

Permalink
v0.1.12
Browse files Browse the repository at this point in the history
add spacy tokenizer
add warning when sequence length exceed the maximum
  • Loading branch information
urchadezaratiana authored and urchadezaratiana committed Apr 24, 2024
1 parent bc540ad commit dd5099d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion gliner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.11"
__version__ = "0.1.12"

from .model import GLiNER

Expand Down
6 changes: 4 additions & 2 deletions gliner/modules/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import random
import warnings
from collections import defaultdict
from typing import List, Tuple, Dict

import torch
from torch import nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
import random


class InstructBase(nn.Module):
Expand All @@ -29,6 +29,8 @@ def preprocess_spans(self, tokens, ner, classes_to_id):
max_len = self.base_config.max_len

if len(tokens) > max_len:
# add warning to say that sentence has been truncated
warnings.warn(f"Sentence of length {len(tokens)} has been truncated to {max_len}")
length = max_len
tokens = tokens[:max_len]
else:
Expand Down

0 comments on commit dd5099d

Please sign in to comment.