Skip to content

Commit

Permalink
Implement auto_max_new_tokens for ExLlama
Browse files Browse the repository at this point in the history
  • Loading branch information
oobabooga committed Aug 2, 2023
1 parent e931844 commit 32a2bbe
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
6 changes: 5 additions & 1 deletion modules/exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,15 @@ def generate_with_streaming(self, prompt, state):
# Tokenizing the input
ids = self.generator.tokenizer.encode(prompt)
ids = ids[:, -get_max_prompt_length(state):]
if state['auto_max_new_tokens']:
max_new_tokens = state['truncation_length'] - ids.shape[-1]
else:
max_new_tokens = state['max_new_tokens']

self.generator.gen_begin_reuse(ids)
initial_len = self.generator.sequence[0].shape[0]
has_leading_space = False
for i in range(state['max_new_tokens']):
for i in range(max_new_tokens):
token = self.generator.gen_single_token()
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
has_leading_space = True
Expand Down
1 change: 1 addition & 0 deletions modules/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@
'repetition_penalty_range',
'seed',
'ban_eos_token',
'auto_max_new_tokens',
},
'AutoGPTQ': {
'temperature',
Expand Down

0 comments on commit 32a2bbe

Please sign in to comment.