Skip to content

Commit

Permalink
Hint Generator type
Browse files Browse the repository at this point in the history
  • Loading branch information
LoganDark committed Jun 4, 2023
1 parent 8ccb10a commit 20f0c2b
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions tokenizer/rwkv_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def printTokens(self, tokens):
# Tokenizer #4 (fast) https://github.com/LoganDark
########################################################################################################

from typing import Generator
from ast import literal_eval

class FastTokenizer:
Expand Down Expand Up @@ -255,7 +256,7 @@ def next_token(self, src: bytes) -> int:
break
return last_token

def encode_bytes(self, src: bytes) -> list[int]:
def encode_bytes(self, src: bytes) -> Generator[int, None, None]:
start, stop = 0, len(src)
while start < stop:
last_token, last = None, self.root
Expand All @@ -274,7 +275,7 @@ def encode_bytes(self, src: bytes) -> list[int]:
def decode_bytes(self, tokens: list[int]) -> bytes:
return b''.join(map(self.tok2val.__getitem__, tokens))

def encode(self, src: str) -> list[int]:
def encode(self, src: str) -> Generator[int, None, None]:
return self.encode_bytes(src.encode('utf-8'))

def decode(self, tokens: list[int]) -> str:
Expand Down

0 comments on commit 20f0c2b

Please sign in to comment.