From bb7ef7823d048e5b6df44c6221d96fcb9390e0d5 Mon Sep 17 00:00:00 2001 From: DARREN OBERST Date: Fri, 22 Mar 2024 03:46:40 -0400 Subject: [PATCH] adding new slim models to catalog and supporting methods --- llmware/agents.py | 95 +++++++++++- llmware/model_configs.py | 315 +++++++++++++++++++++++++++------------ llmware/models.py | 315 +++++++++++++++++++++++++++++++++------ 3 files changed, 575 insertions(+), 150 deletions(-) diff --git a/llmware/agents.py b/llmware/agents.py index b55617db..bff633ee 100644 --- a/llmware/agents.py +++ b/llmware/agents.py @@ -393,8 +393,8 @@ def analyze_responses(self, key,value): def load_tool(self, tool_type, # new options added - use_gpu=True, sample=True, get_logits=False, - max_output=100, temperature=-99): + use_gpu=True, sample=False, get_logits=True, + max_output=100, temperature=0.0): """ Loads a single tool """ @@ -442,11 +442,14 @@ def unload_tool(self, tool_type): self.write_to_journal(journal_update) model = getattr(self, tool_type + "_model") - model.unload_model() - delattr(self, tool_type + "_model") - setattr(self, tool_type + "_model", None) - gc.collect() + if model: + + model.unload_model() + + delattr(self, tool_type + "_model") + setattr(self, tool_type + "_model", None) + gc.collect() return 0 @@ -513,6 +516,10 @@ def exec_function_call(self, tool_type, text=None, function="classify", params=N dict_output = True self.report[work_iter] = self.report[work_iter] | response["llm_response"] + elif response["usage"]["type"] == "list" and tool_type == "summary": + dict_output = True + self.report[work_iter] = self.report[work_iter] | {"summary": response["llm_response"]} + else: logging.warning("update: could not automatically convert to dictionary - " "keeping as string output") @@ -745,6 +752,79 @@ def category(self, text=None, params=None): return self.exec_function_call("category", text=text, params=params) + def sa_ner(self, text=None, params=None): + + """ Generates a dictionary with keys corresponding to 'sentiment' and 'named entity recognition' (NER) + identifiers in the next, such as people, organization, and place. """ + + if not params: + # default parameter key + params = ["sentiment, people, organization, place"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("sa-ner", text=text, params=params) + + def extract(self, text=None, params=None): + + """ Extract receives an input of a text passage and a custom parameter key, and generates a dictionary with + key corresponding to the 'custom parameter' key and a list of values associated with that key, extracted from + the text passage. """ + + if not params: + # default parameter key + params = ["key points"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("extract", text=text, params=params) + + def xsum(self, text=None, params=None): + + """ XSum or 'extreme summarization' receives an input text passage, and returns a dictionary with a 'xsum' + key and a value of a list with one string element, with the string element consisting of a short phrase, + title, headline that provides a concise summary of the text passage. """ + + if not params: + # default parameter key + params = ["xsum"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("xsum", text=text, params=params) + + def summarize(self, text=None, params=None): + + """ Summarizes receives an input text passage, and optional parameters to guide the summarization, and + returns a list of summary points from the text. """ + + if not params: + # default parameter key + params = ["key points (3)"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("summary", text=text, params=params) + + def boolean(self, text=None, params=None): + + """ Boolean receives an input text passage, a yes/no question as its parameter, and then returns a + dictionary with two keys - 'answer' and 'explain' with the 'answer' providing a yes/no classification, and the + explanation providing text from the passage that was used as the basis for the classification. """ + + if not params: + #TODO: what is right way to handle - needs params + params = ["Is this true?"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("boolean", text=text, params=params) + def nli(self, text1, text2, params=None): """ Executes a natural language inference classification on a text, if passed directly, or will pull current @@ -1194,6 +1274,7 @@ def create_new_table(self, output, table_name): for i, entry in enumerate(header_row): col_name = re.sub("[\xfe\xff]","",entry) try: + #TODO: build more robust type checking, e.g., float/decimal/currency test_int = int(test_row[i]) type="integer" except: @@ -1264,7 +1345,7 @@ def create_new_table_from_csv(self,fp=None, fn=None, table_name=None): logging.info("update: table created - column names - %s ", column_names) else: - print("update: table exists - getting column names") + logging.info("update: table exists - getting column names") column_names = self.get_column_names(table_name) # insert records diff --git a/llmware/model_configs.py b/llmware/model_configs.py index d7086c0b..a6392cc1 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -202,10 +202,10 @@ # --this can be configured and over-ridden if you prefer to use the full 200K window {"model_name": 'claude-3-opus-20240229', "display_name": "Anthropic-Claude-3-Opus", "model_family": "ClaudeModel", - "model_category": "generative-api", "model_location": "api", "context_window": 8192}, + "model_category": "generative-api", "model_location": "api", "context_window": 8192}, {"model_name": 'claude-3-sonnet-20240229', "display_name": "Anthropic-Claude-3-Sonnet", "model_family": "ClaudeModel", - "model_category": "generative-api", "model_location": "api", "context_window": 8192}, + "model_category": "generative-api", "model_location": "api", "context_window": 8192}, {"model_name": 'claude-2.1', "display_name": "Anthropic Claude-2.1", "model_family": "ClaudeModel", "model_category": "generative-api", "model_location": "api", "context_window": 8192}, @@ -213,8 +213,9 @@ {"model_name": 'claude-2.0', "display_name": "Anthropic Claude-Claude2-.0", "model_family": "ClaudeModel", "model_category": "generative-api", "model_location": "api", "context_window": 8192}, - {"model_name": 'command-medium-nightly', "display_name": "Cohere Command Medium", "model_family": "CohereGenModel", + {"model_name": 'command-medium-nightly', "display_name": "Cohere Command Medium", "model_family": "CohereGenModel", "model_category": "generative-api","model_location": "api", "context_window": 2048}, + {"model_name": 'command-xlarge-nightly', "display_name": "Cohere Command XLarge", "model_family": "CohereGenModel", "model_category": "generative-api","model_location": "api", "context_window": 2048}, @@ -414,7 +415,7 @@ "gguf_file": "dragon-mistral-7b-q4_k_m.gguf", "gguf_repo": "llmware/dragon-mistral-7b-v0", "link": "https://huggingface.co/llmware/dragon-mistral-7b-v0", - "custom_model_files": ["dragon-mistral-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-mistral-7b-v0"}, + "custom_model_files": [], "custom_model_repo": ""}, # deprecated access to dragon-llama-7b-gguf -> replaced by dragon-llama-answer-tool {"model_name": "llmware/dragon-llama-7b-gguf", "display_name": "dragon-llama-7b-gguf", @@ -424,7 +425,7 @@ "gguf_file": "dragon-llama-7b-q4_k_m.gguf", "gguf_repo": "llmware/dragon-llama-7b-v0", "link": "https://huggingface.co/llmware/dragon-llama-7b-v0", - "custom_model_files": ["dragon-llama-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-llama-7b-v0"}, + "custom_model_files": [], "custom_model_repo": ""}, # deprecated access to dragon-yi-6b-gguf -> replaced by dragon-yi-answer-tool {"model_name": "llmware/dragon-yi-6b-gguf", "display_name": "dragon-yi-6b-gguf", @@ -434,7 +435,7 @@ "gguf_file": "dragon-yi-6b-q4_k_m.gguf", "gguf_repo": "llmware/dragon-yi-6b-v0", "link": "https://huggingface.co/llmware/dragon-yi-6b-v0", - "custom_model_files": ["dragon-yi-6b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-yi-6b-v0"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "dragon-yi-answer-tool", "display_name": "dragon-yi-6b-answer-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -444,7 +445,7 @@ "gguf_repo": "llmware/dragon-yi-answer-tool", "snapshot": True, "link": "https://huggingface.co/llmware/dragon-yi-answer-tool", - "custom_model_files": ["dragon-yi.gguf"], "custom_model_repo": "llmware/dragon-yi-answer-tool"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "dragon-llama-answer-tool", "display_name": "dragon-llama-answer-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -454,7 +455,7 @@ "gguf_repo": "llmware/dragon-llama-answer-tool", "snapshot": True, "link": "https://huggingface.co/llmware/dragon-llama-answer-tool", - "custom_model_files": ["dragon-llama.gguf"], "custom_model_repo": "llmware/dragon-llama-answer-tool"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "dragon-mistral-answer-tool", "display_name": "dragon-mistral-answer-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -464,7 +465,7 @@ "gguf_repo": "llmware/dragon-mistral-answer-tool", "snapshot": True, "link": "https://huggingface.co/llmware/dragon-mistral-answer-tool", - "custom_model_files": ["dragon-mistral.gguf"], "custom_model_repo": "llmware/dragon-mistral-answer-tool"}, + "custom_model_files": [], "custom_model_repo": ""}, # selected top HF open source chat models - gguf {"model_name": "TheBloke/Llama-2-7B-Chat-GGUF", "display_name": "llama-2-7b-chat-gguf", @@ -474,7 +475,7 @@ "gguf_file": "llama-2-7b-chat.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", - "custom_model_files": ["llama-2-7b-chat.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", "display_name": "openhermes-mistral-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -483,7 +484,7 @@ "gguf_file": "openhermes-2.5-mistral-7b.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", - "custom_model_files": ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "TheBloke/zephyr-7B-beta-GGUF", "display_name": "zephyr-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -492,7 +493,7 @@ "gguf_file": "zephyr-7b-beta.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", - "custom_model_files": ["zephyr-7b-beta.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, + "custom_model_files": [], "custom_model_repo": ""}, {"model_name": "TheBloke/Starling-LM-7B-alpha-GGUF", "display_name": "starling-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", @@ -501,24 +502,22 @@ "gguf_file": "starling-lm-7b-alpha.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", - "custom_model_files": ["starling-lm-7b-alpha.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon" - }, + "custom_model_files": [], "custom_model_repo": ""}, # new slim models {"model_name": "slim-ner-tool", "display_name": "slim-ner-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-ner.gguf", "gguf_repo": "llmware/slim-ner-tool", "link": "https://huggingface.co/llmware/slim-ner-tool", - "custom_model_files": ["slim-ner.gguf"], "custom_model_repo": "llmware/slim-ner-tool", - # add function call parameters + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["people", "location", "organization", "misc"], "fc_output_values": [], "tokenizer": "llmware/slim-ner", - "value_zone_markers": {"start": [6024,6796, 3366], "stop": [2033,3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], "snapshot": True}, @@ -526,17 +525,16 @@ {"model_name": "slim-sentiment-tool", "display_name": "slim-sentiment-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-sentiment.gguf", "gguf_repo": "llmware/slim-sentiment-tool", "link": "https://huggingface.co/llmware/slim-sentiment-tool", - "custom_model_files": ["slim-sentiment.gguf"], "custom_model_repo": "llmware/slim-sentiment-tool", - # add function call parameters + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["sentiment"], "fc_output_values": ["positive", "neutral", "negative"], "tokenizer": "llmware/slim-sentiment", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [1066, 22198, 17821], "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, "function": ["classify"], @@ -545,12 +543,12 @@ {"model_name": "slim-emotions-tool", "display_name": "slim-emotions-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-emotions.gguf", "gguf_repo": "llmware/slim-emotions-tool", "link": "https://huggingface.co/llmware/slim-emotions-tool", - "custom_model_files": ["slim-emotions.gguf"], "custom_model_repo": "llmware/slim-emotions-tool", - # add function call parameters + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["emotions"], "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive", @@ -560,7 +558,6 @@ "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised", "terrified", "trusting"], "tokenizer": "llmware/slim-emotions", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], @@ -569,17 +566,16 @@ {"model_name": "slim-ratings-tool", "display_name": "slim-ratings-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-ratings.gguf", "gguf_repo": "llmware/slim-ratings-tool", "link": "https://huggingface.co/llmware/slim-ratings-tool", - "custom_model_files": ["slim-ratings.gguf"], "custom_model_repo": "llmware/slim-ratings-tool", - # add function call parameters + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["rating"], "fc_output_values": ["1", "2", "3", "4", "5"], "tokenizer": "llmware/slim-ratings", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], @@ -588,18 +584,18 @@ {"model_name": "slim-intent-tool", "display_name": "slim-intent-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-intent.gguf", "gguf_repo": "llmware/slim-intent-tool", "link": "https://huggingface.co/llmware/slim-intent-tool", - "custom_model_files": ["slim-intent.gguf"], "custom_model_repo": "llmware/slim-intent-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["intent"], "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback", "invoice", "new account", "order", "payments", "refund", "shipping", "subscription", "terminate"], "tokenizer": "llmware/slim-intent", - "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], @@ -608,16 +604,16 @@ {"model_name": "slim-nli-tool", "display_name": "slim-nli-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-nli.gguf", "gguf_repo": "llmware/slim-nli-tool", "link": "https://huggingface.co/llmware/slim-nli-tool", - "custom_model_files": ["slim-nli.gguf"], "custom_model_repo": "llmware/slim-nli-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["evidence"], "fc_output_values": ["supports", "neutral", "contradicts"], "tokenizer": "llmware/slim-nli", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [9996,5924,17821], "marker_token_lookup": {9996: "contradicts", 5924: "supports", 17821: "neutral"}, "function": ["classify"], @@ -626,71 +622,70 @@ {"model_name": "slim-topics-tool", "display_name": "slim-topics-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-topics.gguf", "gguf_repo": "llmware/slim-topics-tool", "link": "https://huggingface.co/llmware/slim-topics-tool", - "custom_model_files": ["slim-topics.gguf"], "custom_model_repo": "llmware/slim-topics-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["topics"], "fc_output_values": [], "tokenizer": "llmware/slim-topics", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], "snapshot": True}, - {"model_name": "slim-tags-tool", "display_name": "slim-tags-tool", - "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", - "gguf_file": "slim-tags.gguf", - "gguf_repo": "llmware/slim-tags-tool", - "link": "https://huggingface.co/llmware/slim-tags-tool", - "custom_model_files": ["slim-tags.gguf"], "custom_model_repo": "llmware/slim-tags-tool", - "function_call": True, - "primary_keys": ["tags"], - "fc_output_values": [], - "tokenizer": "llmware/slim-tags", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, - "marker_tokens": [], - "marker_token_lookup": {}, - "function": ["classify"], - "snapshot": True}, + {"model_name": "slim-tags-tool", "display_name": "slim-tags-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-tags.gguf", "gguf_repo": "llmware/slim-tags-tool", + "link": "https://huggingface.co/llmware/slim-tags-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", + "function_call": True, + "primary_keys": ["tags"], + "fc_output_values": [], + "tokenizer": "llmware/slim-tags", + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, {"model_name": "slim-sql-tool", "display_name": "slim-sql-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "slim-sql.gguf", "gguf_repo": "llmware/slim-sql-tool", "fc_output_values": [], "link": "https://huggingface.co/llmware/slim-sql-tool", - "custom_model_files": ["slim-sql.gguf"], "custom_model_repo": "llmware/slim-sql-tool", + "custom_model_files": [], "custom_model_repo": "", "tokenizer": "llmware/slim-sql-1b-v0", "snapshot": True}, {"model_name": "bling-answer-tool", "display_name": "bling-answer-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "bling-answer.gguf", "gguf_repo": "llmware/bling-answer-tool", "link": "https://huggingface.co/llmware/bling-answer-tool", - "custom_model_files": ["bling-answer.gguf"], "custom_model_repo": "llmware/bling-answer-tool", - # add function call parameters + "custom_model_files": [], "custom_model_repo": "", "tokenizer": "llmware/bling-tiny-llama-1b-v0", "snapshot": True}, {"model_name": "slim-category-tool", "display_name": "slim-category-tool", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", + "temperature": 0.3, "sample_default": False, "trailing_space": "", "gguf_file": "slim-category.gguf", "gguf_repo": "llmware/slim-category-tool", "link": "https://huggingface.co/llmware/slim-category-tool", - "custom_model_files": ["slim-category.gguf"], "custom_model_repo": "llmware/slim-category-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["category"], "fc_output_values": ["analyst", "announcements", "bonds", "business", "central bank", "commentary", @@ -699,7 +694,6 @@ "markets", "mergers and acquisitions", "opinion", "politics", "public markets", "science", "sports", "stocks", "tech", "world"], "tokenizer": "llmware/slim-category", - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], @@ -710,17 +704,17 @@ {"model_name": "llmware/slim-intent", "display_name": "slim-intent-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-intent", "hf_repo": "llmware/slim-intent", "custom_model_files": [""], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["intent"], "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback", "invoice", "new account", "order", "payments", "refund", "shipping", "subscription", "terminate"], "function": ["classify"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [1066, 22198, 17821], "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, }, @@ -728,14 +722,14 @@ {"model_name": "llmware/slim-sentiment", "display_name": "slim-sentiment-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-sentiment", "hf_repo": "llmware/slim-sentiment", "custom_model_files": [""], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["sentiment"], "fc_output_values": ["positive", "neutral", "negative"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [1066, 22198, 17821], "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, "function": ["classify"]}, @@ -743,10 +737,11 @@ {"model_name": "llmware/slim-emotions", "display_name": "slim-emotions-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-emotions", "hf_repo": "llmware/slim-emotions", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["emotions"], "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive", @@ -755,7 +750,6 @@ "hopeful", "impressed", "jealous", "joy", "joyful", "lonely", "love", "nostalgic", "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised", "terrified", "trusting"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [1066, 22198, 17821], "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, "function": ["classify"]}, @@ -763,14 +757,14 @@ {"model_name": "llmware/slim-ner", "display_name": "slim-ner-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-ner", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "hf_repo": "llmware/slim-ner", "function_call": True, "primary_keys": ["person", "organization", "place", "misc"], "fc_output_values": [], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"]}, @@ -778,14 +772,14 @@ {"model_name": "llmware/slim-nli", "display_name": "slim-nli-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-nli", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", "hf_repo": "llmware/slim-nli", + "output_type": "dict", "function_call": True, "primary_keys": ["evidence"], "fc_output_values": ["supports", "neutral", "contradicts"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"]}, @@ -793,14 +787,14 @@ {"model_name": "llmware/slim-ratings", "display_name": "slim-ratings-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-ratings", "hf_repo": "llmware/slim-ratings", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["rating"], "fc_output_values": ["1", "2", "3", "4", "5"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"]}, @@ -808,9 +802,10 @@ {"model_name": "llmware/slim-category", "display_name": "slim-category-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-category", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "hf_repo": "llmware/slim-category", "function_call": True, "primary_keys": ["category"], @@ -819,7 +814,6 @@ "financials", "health", "human resources", "legal and regulation", "macroeconomics", "markets", "mergers and acquisitions", "opinion", "politics", "public markets", "science", "sports", "stocks", "tech", "world"], - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"]}, @@ -827,12 +821,12 @@ {"model_name": "llmware/slim-tags", "display_name": "slim-tags-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-tags", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", "hf_repo": "llmware/slim-tags", + "outout_type": "dict", "function_call": True, - "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["tags"], @@ -842,12 +836,12 @@ {"model_name": "llmware/slim-topics", "display_name": "slim-topics-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "temperature": 0.0,"sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", "link": "https://huggingface.co/llmware/slim-topics", "hf_repo": "llmware/slim-topics", - "custom_model_files": [""], "custom_model_repo": "", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, - "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]}, "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["topics"], @@ -858,7 +852,8 @@ {"model_name": "llmware/slim-sql-1b-v0", "display_name": "slim-sql-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/slim-sql-1b-v0", + "temperature": 0.0, "sample_default": False, + "trailing_space": "", "link": "https://huggingface.co/llmware/slim-sql-1b-v0", "custom_model_files": [], "custom_model_repo": "", "hf_repo": "llmware/slim-sql-1b-v0", #TODO: assess how to handle SQL models with function call parameters @@ -866,15 +861,137 @@ "fc_output_values": [], "primary_keys": ["sql"], "function": ["sql"]}, - {"model_name": "bling-stablelm-3b-tool", "display_name": "llmware/bling-stablelm-3b-gguf", - "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", - "temperature": 0.3, "trailing_space": "", - "gguf_file": "bling-stablelm.gguf", - "gguf_repo": "llmware/bling-stablelm-3b-gguf", - "snapshot": True, - "link": "https://huggingface.co/llmware/bling-stablelm-3b-gguf", - "custom_model_files": ["bling-stablelm.gguf"], "custom_model_repo": "llmware/bling-stablelm-3b-gguf"}, + {"model_name": "bling-stablelm-3b-tool", "display_name": "llmware/bling-stablelm-3b-gguf", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "bling-stablelm.gguf", + "gguf_repo": "llmware/bling-stablelm-3b-gguf", "snapshot": True, + "link": "https://huggingface.co/llmware/bling-stablelm-3b-gguf", + "custom_model_files": [], "custom_model_repo": ""}, + + {"model_name": "slim-xsum", "display_name": "llmware/slim-xsum", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-xsum", "hf_repo": "llmware/slim-xsum", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True, + "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["xsum"], "fc_output_values": [], + "function": ["classify"]}, + + {"model_name": "slim-xsum-tool", "display_name": "slim-xsum-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-xsum.gguf", "gguf_repo": "llmware/slim-xsum-tool", + "link": "https://huggingface.co/llmware/slim-xsum-tool", + "custom_model_files": [], "custom_model_repo": "", + "output_type": "dict", "function_call": True, "primary_keys": ["xsum"], "fc_output_values": [], + "tokenizer": "llmware/slim-xsum", + "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], "snapshot": True}, + + {"model_name": "slim-extract", "display_name": "llmware/slim-extract", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-extract", "hf_repo": "llmware/slim-extract", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True, + "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["key data points"], "fc_output_values": [], + "function": ["extract"]}, + + {"model_name": "slim-extract-tool", "display_name": "slim-extract-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-extract.gguf", "gguf_repo": "llmware/slim-extract-tool", + "link": "https://huggingface.co/llmware/slim-extract-tool", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", + "function_call": True, "primary_keys": ["key data points"], "fc_output_values": [], + "tokenizer": "llmware/slim-extract", "marker_tokens": [], + "marker_token_lookup": {}, "function": ["extract"], "snapshot": True}, + + {"model_name": "slim-boolean", "display_name": "llmware/slim-boolean", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-boolean", "hf_repo": "llmware/slim-boolean", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True, + "marker_tokens": [2369,9820], "marker_token_lookup": {2369: "no", 9820: "yes"}, + "primary_keys": [], "fc_output_values": [], + "function": ["boolean"]}, + + {"model_name": "slim-boolean-tool", "display_name": "slim-boolean-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-boolean.gguf", "gguf_repo": "llmware/slim-boolean-tool", + "link": "https://huggingface.co/llmware/slim-boolean-tool", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", + "function_call": True, "primary_keys": [], "fc_output_values": [], + "tokenizer": "llmware/slim-boolean", + "marker_tokens": [2369,9820], "marker_token_lookup": {2369: "no", 9820: "yes"}, + "function": ["boolean"], "snapshot": True}, + + {"model_name": "slim-sa-ner", "display_name": "llmware/slim-sa-ner", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-sa-ner", "hf_repo": "llmware/slim-sa-ner", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True, + "marker_tokens": [], "marker_token_lookup": {}, + "primary_keys": ["sentiment, person, organization, place"], "fc_output_values": [], + "function": ["classify"]}, + + {"model_name": "slim-sa-ner-tool", "display_name": "slim-sa-ner-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "sa-ner.gguf", "gguf_repo": "llmware/slim-sa-ner-tool", + "link": "https://huggingface.co/llmware/slim-sa-ner-tool", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", + "function_call": True, "primary_keys": ["sentiment, person, organization, place"], "fc_output_values": [], + "tokenizer": "llmware/slim-sa-ner", "marker_tokens": [], + "marker_token_lookup": {}, "function": ["classify"], "snapshot": True}, + + {"model_name": "slim-tags-3b", "display_name": "llmware/slim-tags-3b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-tags-3b", "hf_repo": "llmware/slim-tags-3b", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True, + "marker_tokens": [], "marker_token_lookup": {}, + "primary_keys": ["tags"], "fc_output_values": [], + "function": ["classify"]}, + + {"model_name": "slim-tags-3b-tool", "display_name": "slim-tags-3b-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-tags-3b.gguf", "gguf_repo": "llmware/slim-tags-3b-tool", + "link": "https://huggingface.co/llmware/slim-tags-3b-tool", + "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", + "function_call": True, "primary_keys": ["tags"], "fc_output_values": [], + "tokenizer": "llmware/slim-tags-3b", "marker_tokens": [], + "marker_token_lookup": {}, "function": ["classify"], "snapshot": True}, + + {"model_name": "slim-summary", "display_name": "llmware/slim-summary", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-summary", "hf_repo": "llmware/slim-summary", + "custom_model_files": [], "custom_model_repo": "", "output_type": "list", "function_call": True, + "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["key points (3)"], "fc_output_values": [], + "function": ["summarize"]}, + + {"model_name": "slim-summary-tool", "display_name": "slim-summary-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", + "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, + "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "", + "gguf_file": "slim-summarize.gguf", "gguf_repo": "llmware/slim-summary-tool", + "link": "https://huggingface.co/llmware/slim-summary-tool", + "custom_model_files": [], "custom_model_repo": "", "output_type": "list", + "function_call": True, "primary_keys": ["key points (3)"], "fc_output_values": [], + "tokenizer": "llmware/slim-summary", + "marker_tokens": [], "marker_token_lookup": {}, "function": ["summarize"], "snapshot": True} ] diff --git a/llmware/models.py b/llmware/models.py index 02f81238..2d97a2d9 100644 --- a/llmware/models.py +++ b/llmware/models.py @@ -87,7 +87,8 @@ class _ModelRegistry: # list of function calling classifier tools llm_fx_tools = ["ner", "sentiment", "topics", "ratings", "emotions", "nli", - "intent", "sql", "answer", "category", "tags"] + "intent", "sql", "answer", "category", "tags", "summary", "xsum", "extract", + "boolean", "sa-ner","tags-3b"] llm_fx_tools_map = {"ner": "slim-ner-tool", "sentiment": "slim-sentiment-tool", @@ -99,7 +100,15 @@ class _ModelRegistry: "tags": "slim-tags-tool", "answer": "bling-answer-tool", "category": "slim-category-tool", - "intent": "slim-intent-tool"} + "intent": "slim-intent-tool", + # new tools added + "summary": "slim-summary-tool", + "xsum": "slim-xsum-tool", + "extract": "slim-extract-tool", + "boolean": "slim-boolean-tool", + "sa-ner": "slim-sa-ner-tool", + "tags-3b": "slim-tags-3b-tool" + } @classmethod def get_model_list(cls): """ List current view of registered models """ @@ -255,6 +264,7 @@ def __init__(self): self.sample = True self.max_output = 100 self.get_logits = False + self.force_reload = False def pull_latest_manifest(self): """ Not implemented currently """ @@ -496,6 +506,23 @@ def locate_and_retrieve_model_bits (self, model_card, api_key=None): model_folder_name = model_card["model_name"] + # new insert - check if custom_model_repo + if "custom_model_repo" in model_card: + if model_card["custom_model_repo"]: + if os.path.exists(model_card["custom_model_repo"]): + if "custom_model_files" in model_card: + if model_card["custom_model_files"]: + if len(model_card["custom_model_files"]) > 0: + if os.path.exists(os.path.join(model_card["custom_model_repo"], + model_card["custom_model_files"][0])): + # confirmed that custom path and at least model artifact exist + print("update: returning custom model path: ", model_card["custom_model_repo"], + model_card["custom_model_files"]) + + return model_card["custom_model_repo"] + else: + raise ModelNotFoundException(f"Custom model repo path - {model_card['custom_model_repo']}") + if model_card["model_family"] == "GGUFGenerativeModel": model_folder_name = model_folder_name.split("/")[-1] @@ -504,7 +531,7 @@ def locate_and_retrieve_model_bits (self, model_card, api_key=None): model_location = os.path.join(LLMWareConfig.get_model_repo_path(), model_folder_name) - if os.path.exists(model_location): + if os.path.exists(model_location) and not self.force_reload: model_parts_in_folder = os.listdir(model_location) if len(model_parts_in_folder) > 0: @@ -648,7 +675,7 @@ def _instantiate_model_class_from_string(self, model_class, model_name, model_ca return my_model def load_model (self, selected_model, api_key=None, use_gpu=True, sample=True,get_logits=False, - max_output=100, temperature=-99): + max_output=100, temperature=-99, force_reload=False): """ Main method for loading and fully instantiating a model based solely on the model's name """ @@ -657,6 +684,7 @@ def load_model (self, selected_model, api_key=None, use_gpu=True, sample=True,ge self.sample=sample self.max_output=max_output self.get_logits=get_logits + self.force_reload = force_reload # note: temperature set by default at -99, which is a dummy value that is over-ridden by the temperature # in the model card. This temperature will only be used if explicitly set by the user at value != -99 @@ -1024,8 +1052,13 @@ def tool_test_run(self, model_name, api_key=None, verbose=False, if "conclusion" in entries: text = "Evidence: " + text + "\nConclusion: " + entries["conclusion"] - # note: testing with temp & max_output - response = model.function_call(text) + # special case for boolean (question = params) + if "question" in entries: + params = entries["question"] + " (explain)" + response = model.function_call(text, params=[params]) + else: + # general case - use default params and function from model card + response = model.function_call(text) # if verbose: print(f"\nupdate: context - test - {i} - {text}") @@ -1110,14 +1143,6 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): """ Analyzes logits from llm response """ - # value zone markers - vz_start = [] - vz_stop = [] - - if "value_zone_markers" in model_card: - vz_start = model_card["value_zone_markers"]["start"] - vz_stop = model_card["value_zone_markers"]["stop"] - # marker tokens for sentiment analysis marker_tokens = [] marker_token_lookup = {} @@ -1134,6 +1159,15 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): # hf tokenizer name tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_name, token=api_key) + try: + # pull bos attributes from tokenizer + bos_token_id = tokenizer.bos_token_id + bos_str = tokenizer.bos_token + except: + # unexpected - but if fail, then take llama defaults + bos_token_id = 1 + bos_str = "" + ryg_string = "" token_probs = [] @@ -1143,7 +1177,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): for i, toks in enumerate(response["output_tokens"]): - if toks in vz_stop: + # change - look directly for '[' in tokenized output + if "]" in tokenizer.decode(toks): vz_capture_on = False if toks in marker_tokens: @@ -1172,7 +1207,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): vz_choices.append(new_entry) - if toks in vz_start: + # change - look for "[" directly in token decoded output + if "[" in tokenizer.decode(toks): vz_capture_on = True if toks == 2: @@ -1185,10 +1221,10 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): token_probs.append(logits[i][x][1]) if logits[i][x][1] > 0.70: - ryg_string += green + tokenizer.decode([1, logits[i][x][0]]) + ryg_string += green + tokenizer.decode([bos_token_id, logits[i][x][0]]) if 0.3 <= logits[i][x][1] <= 0.70: - ryg_string += yellow + tokenizer.decode([1, logits[i][x][0]]) + ryg_string += yellow + tokenizer.decode([bos_token_id, logits[i][x][0]]) new_entry = {} for y in range(0, 3): @@ -1199,7 +1235,7 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): low_confidence_choices.append(new_entry) if logits[i][x][1] < 0.3: - ryg_string += red + tokenizer.decode([1, logits[i][x][0]]) + ryg_string += red + tokenizer.decode([bos_token_id, logits[i][x][0]]) new_entry = {} for y in range(0, 3): @@ -1209,7 +1245,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): low_confidence_choices.append(new_entry) - ryg_string = ryg_string.replace("", "") + # removing hard-coded "" + ryg_string = ryg_string.replace(bos_str, "") logit_analysis = {"ryg_string": ryg_string + color_reset, "choices": vz_choices, "marker_tokens": marker_token_probs, @@ -1272,7 +1309,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): # if very short output, then can not remediate - assume that a bigger problem happened with the inference if len(input_string) < starter: - print("update: llm response very short - could not remediate and convert to dict or list") + # print("update: llm response very short - could not remediate and convert to dict or list") return "string", input_string start = -1 @@ -1292,7 +1329,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): list_start = x if start < 0 and list_start < 0: - print("update: remediation not successful - could not find a start marker for dictionary or list") + # print("update: remediation not successful - could not find a start marker for dictionary or list") return "string", input_string # based on the start marker, determine the target output type @@ -1301,6 +1338,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): list_type = True key_or_value = "value" response_type = "list" + start = list_start-1 else: # try to build the string as a dictionary output list_type = False @@ -1314,6 +1352,8 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): output_list = [] current_key = "" + # print("***test*** - remediation - input string - ", input_string) + for y in range(start + 1, len(input_string)): # note: ASCII ORD conversion - 58 - ':' | 91 - '[' | 93 - ']' | 44 - ',' @@ -1333,28 +1373,45 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): # string markers of ' and " if ord(input_string[counter]) in [34, 39]: - if not string_on: - string_on = True - key_tmp = "" - else: - # end of string token - string_on = False + # insert new check if ' followed by 's' + exception_skip = False + if len(input_string) > counter+1: + if ord(input_string[counter+1]) in [115]: + exception_skip = True + # counter += 1 + # end - new check - if len(key_tmp) > 0: + if not exception_skip: - if not list_type: - if key_or_value == "key": - keys.append(key_tmp) - current_key = key_tmp - output_dict.update({current_key: []}) + if not string_on: + string_on = True + key_tmp = "" + + else: + # end of string token + string_on = False + + if len(key_tmp) > 0: + + if not list_type: + if key_or_value == "key": + keys.append(key_tmp) + current_key = key_tmp + output_dict.update({current_key: []}) + + else: + values.append(key_tmp) + if current_key in output_dict: + output_dict[current_key].append(key_tmp) + else: + logging.warning("update: remediation - could not find key-value to correct - output " + "may be missing certain content in structured output.") + + key_tmp = "" else: + output_list.append(key_tmp) values.append(key_tmp) - output_dict[current_key].append(key_tmp) - key_tmp = "" - else: - output_list.append(key_tmp) - values.append(key_tmp) - key_tmp = "" + key_tmp = "" if ord(input_string[counter]) == 58: @@ -1382,9 +1439,175 @@ def remediate_function_call_string(self,input_string, dedupe_values=True): else: # remediation successful in converting to list output if dedupe_values: - output_list = list(set(output_list)) + dd_output = [] + for elements in output_list: + if elements not in dd_output: + dd_output.append(elements) + + # not using set because it can change the order of the list from output + # output_list = list(set(output_list)) + + output_list = dd_output + return response_type, output_list + def analyze_sampling(self,response): + + """ Analyzes a llm response output dictionary and produces a 'sampling_stats' dictionary to provide + details on the effects, if any, of sampling in the output generation. """ + + sampling_stats = {} + + if "logits" not in response or "output_tokens" not in response: + logging.warning("warning: function get_fx_scores requires a response dictionary with 'logits' key - " + "not found in the current response provided. Set the model parameters to 'get_logits=True'" + "for function call to provide logits") + return sampling_stats + + logits = response["logits"] + output_tokens = response["output_tokens"] + + not_top_selected = 0 + top_token_not_used = [] + + if len(output_tokens) == 0: + return sampling_stats + + for x in range(0, len(output_tokens)): + + top_selected = True + + if output_tokens[x] != logits[x][0][0] and x > 0: + top_selected = False + top_token_not_used.append((x, output_tokens[x], logits[x])) + + if not top_selected and x > 0: + not_top_selected += 1 + + tokens_considered = len(output_tokens) - 1 + if tokens_considered > 0: + percent_top_token = (tokens_considered - not_top_selected) / tokens_considered + else: + percent_top_token = 0.0 + + # sampling_stats added to the output dictionary + sampling_stats.update({"total_output_tokens": len(output_tokens), + "percent_top_token": round(percent_top_token, 3), + "not_top_tokens": top_token_not_used}) + + return sampling_stats + + def get_fx_scores(self,response, hf_tokenizer_name, top_choices=3, logit_count=1, api_key=None): + + """ Provides useful metrics and scores derived from analyzing the logits and output tokens from function call + llm response - currently only supported for HFGenerative and GGUFGenerative models. + + Inputs: + -- llm response dictionary, including logits and output tokens + -- hf_tokenizer_name for the model, which will be used to decode output tokens, logits and identify key + 'value zone' markers for the output response, e.g., identify list boundaries '[' and ']' + -- top_choices - number of candidates to consider in each logit, e.g., top 3 choices considered + -- logit_count - number of tokens to consider in the value zone, whether the first only, or more + -- api_key - optional, if tokenizer in private repository requiring an api key + + Output (dictionary): + -- for each key in the output response, there is a list of the candidate logits in the value zone associated + with that key - the list will be the length of the logit count requested + -- a sampling_stats key will also be produced that will provide summary data on the number of 'value zone' + tokens, the percentage taken from the top output logit candidate and a list of the 'sampled', e.g., + 'not top' logits taken + """ + + # output is a dict of dict + output = {} + + if "logits" not in response or "output_tokens" not in response: + logging.warning("warning: function get_fx_scores requires a response dictionary with 'logits' key - " + "not found in the current response provided. Set the model parameters to 'get_logits=True'" + "for function call to provide logits") + return output + + logits = response["logits"] + + keys_list = [] + llm_response = response["llm_response"] + + if isinstance(llm_response, dict): + for key, value in llm_response.items(): + keys_list.append(key) + elif isinstance(llm_response, list): + keys_list.append("llm_response") + else: + keys_list.append("llm_response") + + # hf tokenizer name + try: + from transformers import AutoTokenizer + except ImportError: + raise DependencyNotInstalledException("transformers") + + tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_name, token=api_key) + + vz_choices = [] + vz_capture_on = False + key_counter = 0 + + min_threshold = 0.005 + vz_logits = 0 + vz_top_logits = 0 + top_token_not_used = [] + + for i, toks in enumerate(response["output_tokens"]): + + decoded = tokenizer.decode(toks) + + if "]" in decoded: + vz_capture_on = False + if vz_choices: + output.update({keys_list[key_counter]: vz_choices}) + key_counter += 1 + vz_choices = [] + + if vz_capture_on: + + new_entry = {} + if toks == logits[i][0][0]: + vz_top_logits += 1 + else: + # the output token does not correspond to the logit with the highest score, so there was a + # 'sampling' effect to this generation - adding this token and corresponding logit to be saved + # and provided as output in 'sampling_stats' + # print("no match: ", i, tokenizer.decode(toks), tokenizer.decode(logits[i][0][0]),toks, logits[i]) + top_token_not_used.append((i, toks, logits[i])) + + vz_logits += 1 + + for x in range(0, top_choices): + + if logits[i][x][1] >= min_threshold: + new_entry.update({tokenizer.decode(logits[i][x][0]): round(logits[i][x][1], 3)}) + + if len(vz_choices) < logit_count: + vz_choices.append(new_entry) + + if "[" in decoded: + vz_capture_on = True + vz_choices = [] + + if vz_top_logits > 0: + top_token_in_value_zone = round(vz_logits / vz_top_logits, 2) + else: + top_token_in_value_zone = 0.0 + + # sampling_stats added to the output dictionary + output.update({"sampling_stats": {"total_vz_tokens": vz_logits, + "percent_top_token": top_token_in_value_zone, + "not_top_tokens": top_token_not_used} + }) + + return output + + class PromptCatalog: """ PromptCatalog manages prompt styles and prompt wrappers. """ @@ -3895,6 +4118,7 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo if self.model_card: if "hf_repo" in self.model_card: hf_repo_name = self.model_card["hf_repo"] + self.hf_tokenizer_name = hf_repo_name if api_key: if torch.cuda.is_available(): @@ -4428,7 +4652,8 @@ def register_top_logits(self, next_token_logit): top_logits = [] # by default, self.top_logit_count = 10, will get the top 10 highest values in logit output for x in range(0, self.top_logit_count): - pair = (sm_args_sorted[logit_size - x - 1], sm_sorted[logit_size - x - 1]) + # experiment - rounding the long float number + pair = (sm_args_sorted[logit_size - x - 1], round(sm_sorted[logit_size - x - 1],3)) top_logits.append(pair) self.logits_record.append(top_logits) @@ -5337,7 +5562,8 @@ def sample(self, idx=0, logits_array=None): nl_logit = logits_array[nl_token] - if self.penalty_last_n > 0: + # note: important to skip this if use_sampling is False + if self.penalty_last_n > 0 and self.use_sampling: self._lib.llama_sample_repetition_penalties(self._ctx.ctx, ctypes.byref(token_data_array.candidates), @@ -5447,7 +5673,8 @@ def register_top_logits(self): top_logits = [] for x in range(0,self.top_logit_count): - pair = (sm_args_sorted[logit_size-x-1],sm_sorted[logit_size-x-1]) + # experiment - try rounding the float number + pair = (sm_args_sorted[logit_size-x-1],round(sm_sorted[logit_size-x-1],3)) top_logits.append(pair) self.logits_record.append(top_logits)