From dd1bbb3a5d51c4ddfaada0a72d2d7bf4f07c4f4b Mon Sep 17 00:00:00 2001 From: cal066 Date: Wed, 26 Jul 2023 05:00:06 +0000 Subject: [PATCH 1/6] ctransformers: another attempt Generalized ctransformers based on: https://github.com/oobabooga/text-generation-webui/pull/2892 Credits to randoentity --- modules/ctransformers_model.py | 76 +++++++++++++++++++++++ modules/loaders.py | 109 +++++++++++++++++++++++---------- modules/models.py | 22 ++++++- modules/shared.py | 2 +- modules/text_generation.py | 7 +-- modules/ui_model_menu.py | 5 +- requirements.txt | 3 + 7 files changed, 183 insertions(+), 41 deletions(-) create mode 100644 modules/ctransformers_model.py diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py new file mode 100644 index 0000000000..7bce57d9fa --- /dev/null +++ b/modules/ctransformers_model.py @@ -0,0 +1,76 @@ +from ctransformers import AutoModelForCausalLM +from ctransformers import AutoConfig + +from modules import shared +from modules.callbacks import Iteratorize +from modules.logging_colors import logger + +class CtransformersModel: + def __init__(self): + pass + + @classmethod + def from_pretrained(self, path): + result = self() + stops = shared.settings['custom_stopping_strings'] + stops.append("<|end|>") + + # ctransformers uses -1 for random seed + config = AutoConfig.from_pretrained( + str(path), + stop=stops, + threads=shared.args.threads, + gpu_layers=shared.args.n_gpu_layers, + batch_size=shared.args.n_batch, + stream=not shared.args.no_stream, + seed=(-1 if shared.args.llama_cpp_seed == 0 else shared.args.llama_cpp_seed) + ) + self.model = AutoModelForCausalLM.from_pretrained( + str(result.model_dir(path) if result.model_type_is_auto() else path), + model_type=(None if result.model_type_is_auto() else shared.args.model_type), + config=config + ) + logger.info(f'Using ctransformers model_type: {self.model.model_type} for {self.model.model_path}') + return result, result + + def model_type_is_auto(self): + return shared.args.model_type == "Auto" or shared.args.model_type == "None" + + def model_dir(self, path): + if path.is_file(): + return path.parent + return path + + def encode(self, string, **kwargs): + return self.model.tokenize(string) + + def decode(self, ids): + return self.model.detokenize(ids) + + + def generate(self, prompt, state, callback=None): + prompt = prompt if type(prompt) is str else prompt.decode() + generator = self.model._stream( + prompt=prompt, + max_new_tokens=state['max_new_tokens'], + temperature=state['temperature'], + top_p=state['top_p'], + top_k=state['top_k'], + repetition_penalty=state['repetition_penalty'], + threads=shared.args.threads + ) + + output = "" + for token in generator: + if callback: + callback(token) + output += token + return output + + + def generate_with_streaming(self, *args, **kwargs): + with Iteratorize(self.generate, args, kwargs, callback=None) as generator: + reply = '' + for token in generator: + reply += token + yield reply diff --git a/modules/loaders.py b/modules/loaders.py index 07bc455cea..1addde44d3 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -1,10 +1,43 @@ import functools +from collections import OrderedDict import gradio as gr from modules import shared -loaders_and_params = { +loaders_and_params = OrderedDict({ + 'Transformers': [ + 'cpu_memory', + 'gpu_memory', + 'trust_remote_code', + 'load_in_8bit', + 'bf16', + 'cpu', + 'disk', + 'auto_devices', + 'load_in_4bit', + 'use_double_quant', + 'quant_type', + 'compute_dtype', + 'trust_remote_code', + 'alpha_value', + 'compress_pos_emb', + 'transformers_info' + ], + 'ExLlama_HF': [ + 'gpu_split', + 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', + 'exllama_HF_info', + ], + 'ExLlama': [ + 'gpu_split', + 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', + 'exllama_info', + ], 'AutoGPTQ': [ 'triton', 'no_inject_fused_attention', @@ -59,39 +92,17 @@ 'cpu', 'llamacpp_HF_info', ], - 'Transformers': [ - 'cpu_memory', - 'gpu_memory', - 'trust_remote_code', - 'load_in_8bit', - 'bf16', - 'cpu', - 'disk', - 'auto_devices', - 'load_in_4bit', - 'use_double_quant', - 'quant_type', - 'compute_dtype', - 'trust_remote_code', - 'alpha_value', - 'compress_pos_emb', - 'transformers_info' - ], - 'ExLlama': [ - 'gpu_split', - 'max_seq_len', - 'alpha_value', - 'compress_pos_emb', - 'exllama_info', - ], - 'ExLlama_HF': [ - 'gpu_split', - 'max_seq_len', - 'alpha_value', - 'compress_pos_emb', - 'exllama_HF_info', + 'ctransformers': [ + 'n_ctx', + 'n_gpu_layers', + 'n_batch', + 'threads', + 'no_mmap', + 'mlock', + 'model_type', + 'llama_cpp_seed', ] -} +}) loaders_samplers = { 'Transformers': { @@ -256,6 +267,13 @@ 'skip_special_tokens', 'auto_max_new_tokens', }, + 'ctransformers': { + 'temperature', + 'top_p', + 'top_k', + 'repetition_penalty', + 'seed' + } } @@ -276,6 +294,31 @@ def blacklist_samplers(loader): else: return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] +model_loader_type_table = { + 'GPTQ-for-LLaMa': [ + "None", + "llama", + "opt", + "gptj" + ], + 'ctransformers': [ + "None", + "gpt2", + "gptj", + "gptneox", + "llama", + "mpt", + "dollyv2" + "replit", + "starcoder", + "falcon" + ], +} + +def model_loader_type(loader): + if loader in model_loader_type_table: + return model_loader_type_table[loader] + return ["None"] def get_gpu_memory_keys(): return [k for k in shared.gradio if k.startswith('gpu_memory')] diff --git a/modules/models.py b/modules/models.py index aad142c18c..f09f71291f 100644 --- a/modules/models.py +++ b/modules/models.py @@ -58,7 +58,8 @@ def load_model(model_name, loader=None): 'llamacpp_HF': llamacpp_HF_loader, 'RWKV': RWKV_loader, 'ExLlama': ExLlama_loader, - 'ExLlama_HF': ExLlama_HF_loader + 'ExLlama_HF': ExLlama_HF_loader, + 'ctransformers': CtransformorsModel_loader, } p = Path(model_name) @@ -268,6 +269,25 @@ def llamacpp_HF_loader(model_name): return model, tokenizer +def CtransformorsModel_loader(model_name): + from modules.ctransformers_model import CtransformersModel + + path = Path(f'{shared.args.model_dir}/{model_name}') + logger.info(f'ctransformers loading: {path}\n') + ctrans = CtransformersModel() + if ctrans.model_type_is_auto(): + model_file = path + else: + if path.is_file(): + model_file = path + else: + model_file = list( + Path(f'{shared.args.model_dir}/{model_name}').glob('*.bin') + )[0] + logger.info(f'ctransformers weights detected: {model_file}\n') + model, tokenizer = ctrans.from_pretrained(model_file) + return model, tokenizer + def GPTQ_loader(model_name): # Monkey patch diff --git a/modules/shared.py b/modules/shared.py index 224fa6aafa..2246439e8b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -34,7 +34,7 @@ 'autoload_model': False, 'max_new_tokens': 200, 'max_new_tokens_min': 1, - 'max_new_tokens_max': 4096, + 'max_new_tokens_max': 8000, 'auto_max_new_tokens': False, 'seed': -1, 'negative_prompt': '', diff --git a/modules/text_generation.py b/modules/text_generation.py index 6e95414bbd..56e13dc6b0 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -41,7 +41,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): yield '' return - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel']: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'CtransformersModel']: generate_func = generate_reply_custom else: generate_func = generate_reply_HF @@ -88,9 +88,8 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): yield reply - def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None): - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'CtransformersModel']: input_ids = shared.tokenizer.encode(str(prompt)) input_ids = np.array(input_ids).reshape(1, len(input_ids)) else: @@ -104,7 +103,7 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt if truncation_length is not None: input_ids = input_ids[:, -truncation_length:] - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel'] or shared.args.cpu: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'CtransformersModel'] or shared.args.cpu: return input_ids elif shared.args.deepspeed: return input_ids.to(device=local_rank) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 0c1042f68a..44389d2f77 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -63,7 +63,7 @@ def create_ui(): with gr.Row(): with gr.Column(): - shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value=None) + shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None) with gr.Box(): with gr.Row(): with gr.Column(): @@ -84,7 +84,7 @@ def create_ui(): shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") - shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None") + shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None"], value=shared.args.model_type or "None") shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0) shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') @@ -128,6 +128,7 @@ def create_ui(): def create_event_handlers(): shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) + shared.gradio['loader'].change(fn=lambda value: gr.update(choices=loaders.model_loader_type(value)), inputs=shared.gradio['loader'], outputs=shared.gradio['model_type']) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded diff --git a/requirements.txt b/requirements.txt index b27e14c5bc..ec6a7e470c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,3 +40,6 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/text # GPTQ-for-LLaMa https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" + +# ctransformers +https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.20+cu117-py3-none-any.whl From dbfba02437a3afabbf9a705c6e3ded84c0a11f09 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:21:08 -0700 Subject: [PATCH 2/6] Some adjustments --- modules/ctransformers_model.py | 9 ++----- modules/loaders.py | 47 ++++++++++++++++++---------------- modules/models.py | 1 + modules/shared.py | 4 ++- modules/text_generation.py | 1 + modules/ui_model_menu.py | 2 +- 6 files changed, 33 insertions(+), 31 deletions(-) diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index 7bce57d9fa..cb6074d8ac 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -1,10 +1,10 @@ -from ctransformers import AutoModelForCausalLM -from ctransformers import AutoConfig +from ctransformers import AutoConfig, AutoModelForCausalLM from modules import shared from modules.callbacks import Iteratorize from modules.logging_colors import logger + class CtransformersModel: def __init__(self): pass @@ -12,13 +12,10 @@ def __init__(self): @classmethod def from_pretrained(self, path): result = self() - stops = shared.settings['custom_stopping_strings'] - stops.append("<|end|>") # ctransformers uses -1 for random seed config = AutoConfig.from_pretrained( str(path), - stop=stops, threads=shared.args.threads, gpu_layers=shared.args.n_gpu_layers, batch_size=shared.args.n_batch, @@ -47,7 +44,6 @@ def encode(self, string, **kwargs): def decode(self, ids): return self.model.detokenize(ids) - def generate(self, prompt, state, callback=None): prompt = prompt if type(prompt) is str else prompt.decode() generator = self.model._stream( @@ -67,7 +63,6 @@ def generate(self, prompt, state, callback=None): output += token return output - def generate_with_streaming(self, *args, **kwargs): with Iteratorize(self.generate, args, kwargs, callback=None) as generator: reply = '' diff --git a/modules/loaders.py b/modules/loaders.py index 1addde44d3..db20b8ea7e 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -276,25 +276,7 @@ } } - -@functools.cache -def list_all_samplers(): - all_samplers = set() - for k in loaders_samplers: - for sampler in loaders_samplers[k]: - all_samplers.add(sampler) - - return sorted(all_samplers) - - -def blacklist_samplers(loader): - all_samplers = list_all_samplers() - if loader == 'All': - return [gr.update(visible=True) for sampler in all_samplers] - else: - return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] - -model_loader_type_table = { +loaders_model_types = { 'GPTQ-for-LLaMa': [ "None", "llama", @@ -315,11 +297,32 @@ def blacklist_samplers(loader): ], } -def model_loader_type(loader): - if loader in model_loader_type_table: - return model_loader_type_table[loader] + +@functools.cache +def list_all_samplers(): + all_samplers = set() + for k in loaders_samplers: + for sampler in loaders_samplers[k]: + all_samplers.add(sampler) + + return sorted(all_samplers) + + +def blacklist_samplers(loader): + all_samplers = list_all_samplers() + if loader == 'All': + return [gr.update(visible=True) for sampler in all_samplers] + else: + return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] + + +def get_model_types(loader): + if loader in loaders_model_types: + return loaders_model_types[loader] + return ["None"] + def get_gpu_memory_keys(): return [k for k in shared.gradio if k.startswith('gpu_memory')] diff --git a/modules/models.py b/modules/models.py index f09f71291f..c2cd73b4bc 100644 --- a/modules/models.py +++ b/modules/models.py @@ -288,6 +288,7 @@ def CtransformorsModel_loader(model_name): model, tokenizer = ctrans.from_pretrained(model_file) return model, tokenizer + def GPTQ_loader(model_name): # Monkey patch diff --git a/modules/shared.py b/modules/shared.py index 2246439e8b..cb6f0ae16e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -34,7 +34,7 @@ 'autoload_model': False, 'max_new_tokens': 200, 'max_new_tokens_min': 1, - 'max_new_tokens_max': 8000, + 'max_new_tokens_max': 4096, 'auto_max_new_tokens': False, 'seed': -1, 'negative_prompt': '', @@ -215,6 +215,8 @@ def fix_loader_name(name): return 'ExLlama' elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']: return 'ExLlama_HF' + elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']: + return 'ctransformers' def add_extension(name): diff --git a/modules/text_generation.py b/modules/text_generation.py index 56e13dc6b0..30e81355e9 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -88,6 +88,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): yield reply + def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None): if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'CtransformersModel']: input_ids = shared.tokenizer.encode(str(prompt)) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 44389d2f77..147759469c 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -128,7 +128,7 @@ def create_ui(): def create_event_handlers(): shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) - shared.gradio['loader'].change(fn=lambda value: gr.update(choices=loaders.model_loader_type(value)), inputs=shared.gradio['loader'], outputs=shared.gradio['model_type']) + shared.gradio['loader'].change(fn=lambda value: gr.update(choices=loaders.get_model_types(value)), inputs=shared.gradio['loader'], outputs=shared.gradio['model_type']) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded From d27dc7678be9e4b100d2be9b9b008f87a7e4dc7c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:24:45 -0700 Subject: [PATCH 3/6] Update README --- README.md | 2 +- modules/ctransformers_model.py | 2 ++ modules/models.py | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ad2ad1edb4..8ceb077c16 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features * 3 interface modes: default, notebook, and chat -* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa +* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa, ctransformers * Dropdown menu for quickly switching between different models * LoRA: load and unload LoRAs on the fly, train a new LoRA * Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index cb6074d8ac..1dbe073abf 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -22,11 +22,13 @@ def from_pretrained(self, path): stream=not shared.args.no_stream, seed=(-1 if shared.args.llama_cpp_seed == 0 else shared.args.llama_cpp_seed) ) + self.model = AutoModelForCausalLM.from_pretrained( str(result.model_dir(path) if result.model_type_is_auto() else path), model_type=(None if result.model_type_is_auto() else shared.args.model_type), config=config ) + logger.info(f'Using ctransformers model_type: {self.model.model_type} for {self.model.model_path}') return result, result diff --git a/modules/models.py b/modules/models.py index c2cd73b4bc..a172b29cf1 100644 --- a/modules/models.py +++ b/modules/models.py @@ -273,7 +273,7 @@ def CtransformorsModel_loader(model_name): from modules.ctransformers_model import CtransformersModel path = Path(f'{shared.args.model_dir}/{model_name}') - logger.info(f'ctransformers loading: {path}\n') + logger.info(f'ctransformers loading: {path}') ctrans = CtransformersModel() if ctrans.model_type_is_auto(): model_file = path @@ -281,10 +281,9 @@ def CtransformorsModel_loader(model_name): if path.is_file(): model_file = path else: - model_file = list( - Path(f'{shared.args.model_dir}/{model_name}').glob('*.bin') - )[0] + model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.bin'))[0] logger.info(f'ctransformers weights detected: {model_file}\n') + model, tokenizer = ctrans.from_pretrained(model_file) return model, tokenizer From a8a683fe98ed08e2cf6abd4558149a6a7ef0340a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:35:56 -0700 Subject: [PATCH 4/6] Several fixes --- modules/ctransformers_model.py | 5 ++++- modules/loaders.py | 1 - modules/models.py | 9 ++++----- modules/ui_model_menu.py | 5 +++-- modules/ui_parameters.py | 2 +- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index 1dbe073abf..b3d001d3e3 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -19,7 +19,7 @@ def from_pretrained(self, path): threads=shared.args.threads, gpu_layers=shared.args.n_gpu_layers, batch_size=shared.args.n_batch, - stream=not shared.args.no_stream, + stream=True, seed=(-1 if shared.args.llama_cpp_seed == 0 else shared.args.llama_cpp_seed) ) @@ -38,6 +38,7 @@ def model_type_is_auto(self): def model_dir(self, path): if path.is_file(): return path.parent + return path def encode(self, string, **kwargs): @@ -62,7 +63,9 @@ def generate(self, prompt, state, callback=None): for token in generator: if callback: callback(token) + output += token + return output def generate_with_streaming(self, *args, **kwargs): diff --git a/modules/loaders.py b/modules/loaders.py index db20b8ea7e..e60f6260e3 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -272,7 +272,6 @@ 'top_p', 'top_k', 'repetition_penalty', - 'seed' } } diff --git a/modules/models.py b/modules/models.py index a172b29cf1..d60aecd080 100644 --- a/modules/models.py +++ b/modules/models.py @@ -59,7 +59,7 @@ def load_model(model_name, loader=None): 'RWKV': RWKV_loader, 'ExLlama': ExLlama_loader, 'ExLlama_HF': ExLlama_HF_loader, - 'ctransformers': CtransformorsModel_loader, + 'ctransformers': ctransformers_loader, } p = Path(model_name) @@ -243,7 +243,7 @@ def llamacpp_loader(model_name): else: model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*ggml*.bin'))[0] - logger.info(f"llama.cpp weights detected: {model_file}\n") + logger.info(f"llama.cpp weights detected: {model_file}") model, tokenizer = LlamaCppModel.from_pretrained(model_file) return model, tokenizer @@ -269,11 +269,10 @@ def llamacpp_HF_loader(model_name): return model, tokenizer -def CtransformorsModel_loader(model_name): +def ctransformers_loader(model_name): from modules.ctransformers_model import CtransformersModel path = Path(f'{shared.args.model_dir}/{model_name}') - logger.info(f'ctransformers loading: {path}') ctrans = CtransformersModel() if ctrans.model_type_is_auto(): model_file = path @@ -282,8 +281,8 @@ def CtransformorsModel_loader(model_name): model_file = path else: model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.bin'))[0] - logger.info(f'ctransformers weights detected: {model_file}\n') + logger.info(f'ctransformers weights detected: {model_file}') model, tokenizer = ctrans.from_pretrained(model_file) return model, tokenizer diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 147759469c..61905ca65a 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -127,8 +127,9 @@ def create_ui(): def create_event_handlers(): - shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) - shared.gradio['loader'].change(fn=lambda value: gr.update(choices=loaders.get_model_types(value)), inputs=shared.gradio['loader'], outputs=shared.gradio['model_type']) + shared.gradio['loader'].change( + loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())).then( + lambda value: gr.update(choices=loaders.get_model_types(value)), shared.gradio['loader'], shared.gradio['model_type']) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 75bce9b10a..4b9fb91832 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -16,7 +16,7 @@ def create_ui(default_preset): shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') with gr.Column(): - shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All", "Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value="All", elem_classes='slim-dropdown') + shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown') with gr.Row(): with gr.Column(): From e2a7c611598f23d19edd572edb28d10aba4ffa45 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:37:40 -0700 Subject: [PATCH 5/6] Change syntax --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 61905ca65a..7b852a443b 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -129,7 +129,7 @@ def create_ui(): def create_event_handlers(): shared.gradio['loader'].change( loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())).then( - lambda value: gr.update(choices=loaders.get_model_types(value)), shared.gradio['loader'], shared.gradio['model_type']) + lambda value: gr.update(choices=loaders.get_model_types(value)), gradio('loader'), gradio('model_type')) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded From 8f3ea057794388438e1aa027b18c2d63de9d8466 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:39:09 -0700 Subject: [PATCH 6/6] Remove unused params --- modules/loaders.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/loaders.py b/modules/loaders.py index e60f6260e3..fa5e03c288 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -97,8 +97,6 @@ 'n_gpu_layers', 'n_batch', 'threads', - 'no_mmap', - 'mlock', 'model_type', 'llama_cpp_seed', ]