From bb7ef7823d048e5b6df44c6221d96fcb9390e0d5 Mon Sep 17 00:00:00 2001
From: DARREN OBERST <darrenoberst@DARRENs-MacBook-Pro.local>
Date: Fri, 22 Mar 2024 03:46:40 -0400
Subject: [PATCH] adding new slim models to catalog and supporting methods

---
 llmware/agents.py        |  95 +++++++++++-
 llmware/model_configs.py | 315 +++++++++++++++++++++++++++------------
 llmware/models.py        | 315 +++++++++++++++++++++++++++++++++------
 3 files changed, 575 insertions(+), 150 deletions(-)

diff --git a/llmware/agents.py b/llmware/agents.py
index b55617db..bff633ee 100644
--- a/llmware/agents.py
+++ b/llmware/agents.py
@@ -393,8 +393,8 @@ def analyze_responses(self, key,value):
 
     def load_tool(self, tool_type,
                   # new options added
-                   use_gpu=True, sample=True, get_logits=False,
-                   max_output=100, temperature=-99):
+                   use_gpu=True, sample=False, get_logits=True,
+                   max_output=100, temperature=0.0):
 
         """ Loads a single tool """
 
@@ -442,11 +442,14 @@ def unload_tool(self, tool_type):
             self.write_to_journal(journal_update)
 
             model = getattr(self, tool_type + "_model")
-            model.unload_model()
 
-            delattr(self, tool_type + "_model")
-            setattr(self, tool_type + "_model", None)
-            gc.collect()
+            if model:
+
+                model.unload_model()
+
+                delattr(self, tool_type + "_model")
+                setattr(self, tool_type + "_model", None)
+                gc.collect()
 
         return 0
 
@@ -513,6 +516,10 @@ def exec_function_call(self, tool_type, text=None, function="classify", params=N
                         dict_output = True
                         self.report[work_iter] = self.report[work_iter] | response["llm_response"]
 
+                    elif response["usage"]["type"] == "list" and tool_type == "summary":
+                        dict_output = True
+                        self.report[work_iter] = self.report[work_iter] | {"summary": response["llm_response"]}
+
                     else:
                         logging.warning("update: could not automatically convert to dictionary - "
                                         "keeping as string output")
@@ -745,6 +752,79 @@ def category(self, text=None, params=None):
 
         return self.exec_function_call("category", text=text, params=params)
 
+    def sa_ner(self, text=None, params=None):
+
+        """ Generates a dictionary with keys corresponding to 'sentiment' and 'named entity recognition' (NER)
+        identifiers in the next, such as people, organization, and place. """
+
+        if not params:
+            # default parameter key
+            params = ["sentiment, people, organization, place"]
+
+        if isinstance(params, str):
+            params = [params]
+
+        return self.exec_function_call("sa-ner", text=text, params=params)
+
+    def extract(self, text=None, params=None):
+
+        """ Extract receives an input of a text passage and a custom parameter key, and generates a dictionary with
+        key corresponding to the 'custom parameter' key and a list of values associated with that key, extracted from
+        the text passage. """
+
+        if not params:
+            # default parameter key
+            params = ["key points"]
+
+        if isinstance(params, str):
+            params = [params]
+
+        return self.exec_function_call("extract", text=text, params=params)
+
+    def xsum(self, text=None, params=None):
+
+        """ XSum or 'extreme summarization' receives an input text passage, and returns a dictionary with a 'xsum'
+        key and a value of a list with one string element, with the string element consisting of a short phrase,
+        title, headline that provides a concise summary of the text passage. """
+
+        if not params:
+            # default parameter key
+            params = ["xsum"]
+
+        if isinstance(params, str):
+            params = [params]
+
+        return self.exec_function_call("xsum", text=text, params=params)
+
+    def summarize(self, text=None, params=None):
+
+        """ Summarizes receives an input text passage, and optional parameters to guide the summarization, and
+        returns a list of summary points from the text. """
+
+        if not params:
+            # default parameter key
+            params = ["key points (3)"]
+
+        if isinstance(params, str):
+            params = [params]
+
+        return self.exec_function_call("summary", text=text, params=params)
+
+    def boolean(self, text=None, params=None):
+
+        """ Boolean receives an input text passage, a yes/no question as its parameter, and then returns a
+        dictionary with two keys - 'answer' and 'explain' with the 'answer' providing a yes/no classification, and the
+        explanation providing text from the passage that was used as the basis for the classification. """
+
+        if not params:
+            #TODO:  what is right way to handle - needs params
+            params = ["Is this true?"]
+
+        if isinstance(params, str):
+            params = [params]
+
+        return self.exec_function_call("boolean", text=text, params=params)
+
     def nli(self, text1, text2, params=None):
 
         """ Executes a natural language inference classification on a text, if passed directly, or will pull current
@@ -1194,6 +1274,7 @@ def create_new_table(self, output, table_name):
         for i, entry in enumerate(header_row):
             col_name = re.sub("[\xfe\xff]","",entry)
             try:
+                #TODO: build more robust type checking, e.g., float/decimal/currency
                 test_int = int(test_row[i])
                 type="integer"
             except:
@@ -1264,7 +1345,7 @@ def create_new_table_from_csv(self,fp=None, fn=None, table_name=None):
             logging.info("update: table created - column names - %s ", column_names)
 
         else:
-            print("update: table exists - getting column names")
+            logging.info("update: table exists - getting column names")
             column_names = self.get_column_names(table_name)
 
         # insert records
diff --git a/llmware/model_configs.py b/llmware/model_configs.py
index d7086c0b..a6392cc1 100644
--- a/llmware/model_configs.py
+++ b/llmware/model_configs.py
@@ -202,10 +202,10 @@
     # --this can be configured and over-ridden if you prefer to use the full 200K window
 
     {"model_name": 'claude-3-opus-20240229', "display_name": "Anthropic-Claude-3-Opus", "model_family": "ClaudeModel",
-   "model_category": "generative-api", "model_location": "api", "context_window": 8192},
+     "model_category": "generative-api", "model_location": "api", "context_window": 8192},
 
     {"model_name": 'claude-3-sonnet-20240229', "display_name": "Anthropic-Claude-3-Sonnet", "model_family": "ClaudeModel",
-   "model_category": "generative-api", "model_location": "api", "context_window": 8192},
+     "model_category": "generative-api", "model_location": "api", "context_window": 8192},
 
     {"model_name": 'claude-2.1', "display_name": "Anthropic Claude-2.1", "model_family": "ClaudeModel",
     "model_category": "generative-api", "model_location": "api", "context_window": 8192},
@@ -213,8 +213,9 @@
     {"model_name": 'claude-2.0', "display_name": "Anthropic Claude-Claude2-.0",
     "model_family": "ClaudeModel", "model_category": "generative-api", "model_location": "api", "context_window": 8192},
 
-   {"model_name": 'command-medium-nightly', "display_name": "Cohere Command Medium", "model_family": "CohereGenModel",
+    {"model_name": 'command-medium-nightly', "display_name": "Cohere Command Medium", "model_family": "CohereGenModel",
      "model_category": "generative-api","model_location": "api", "context_window": 2048},
+
     {"model_name": 'command-xlarge-nightly', "display_name": "Cohere Command XLarge", "model_family": "CohereGenModel",
      "model_category": "generative-api","model_location": "api", "context_window": 2048},
 
@@ -414,7 +415,7 @@
      "gguf_file": "dragon-mistral-7b-q4_k_m.gguf",
      "gguf_repo": "llmware/dragon-mistral-7b-v0",
      "link": "https://huggingface.co/llmware/dragon-mistral-7b-v0",
-     "custom_model_files": ["dragon-mistral-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-mistral-7b-v0"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
     # deprecated access to dragon-llama-7b-gguf -> replaced by dragon-llama-answer-tool
     {"model_name": "llmware/dragon-llama-7b-gguf", "display_name": "dragon-llama-7b-gguf",
@@ -424,7 +425,7 @@
      "gguf_file": "dragon-llama-7b-q4_k_m.gguf",
      "gguf_repo": "llmware/dragon-llama-7b-v0",
      "link": "https://huggingface.co/llmware/dragon-llama-7b-v0",
-     "custom_model_files": ["dragon-llama-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-llama-7b-v0"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
     # deprecated access to dragon-yi-6b-gguf -> replaced by dragon-yi-answer-tool
     {"model_name": "llmware/dragon-yi-6b-gguf", "display_name": "dragon-yi-6b-gguf",
@@ -434,7 +435,7 @@
      "gguf_file": "dragon-yi-6b-q4_k_m.gguf",
      "gguf_repo": "llmware/dragon-yi-6b-v0",
      "link": "https://huggingface.co/llmware/dragon-yi-6b-v0",
-     "custom_model_files": ["dragon-yi-6b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-yi-6b-v0"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
    {"model_name": "dragon-yi-answer-tool", "display_name": "dragon-yi-6b-answer-tool",
     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -444,7 +445,7 @@
     "gguf_repo": "llmware/dragon-yi-answer-tool",
     "snapshot": True,
     "link": "https://huggingface.co/llmware/dragon-yi-answer-tool",
-    "custom_model_files": ["dragon-yi.gguf"], "custom_model_repo": "llmware/dragon-yi-answer-tool"},
+    "custom_model_files": [], "custom_model_repo": ""},
 
    {"model_name": "dragon-llama-answer-tool", "display_name": "dragon-llama-answer-tool",
     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -454,7 +455,7 @@
     "gguf_repo": "llmware/dragon-llama-answer-tool",
     "snapshot": True,
     "link": "https://huggingface.co/llmware/dragon-llama-answer-tool",
-    "custom_model_files": ["dragon-llama.gguf"], "custom_model_repo": "llmware/dragon-llama-answer-tool"},
+    "custom_model_files": [], "custom_model_repo": ""},
 
    {"model_name": "dragon-mistral-answer-tool", "display_name": "dragon-mistral-answer-tool",
     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -464,7 +465,7 @@
     "gguf_repo": "llmware/dragon-mistral-answer-tool",
     "snapshot": True,
     "link": "https://huggingface.co/llmware/dragon-mistral-answer-tool",
-    "custom_model_files": ["dragon-mistral.gguf"], "custom_model_repo": "llmware/dragon-mistral-answer-tool"},
+    "custom_model_files": [], "custom_model_repo": ""},
 
     # selected top HF open source chat models - gguf
     {"model_name": "TheBloke/Llama-2-7B-Chat-GGUF", "display_name": "llama-2-7b-chat-gguf",
@@ -474,7 +475,7 @@
      "gguf_file": "llama-2-7b-chat.Q4_K_M.gguf",
      "gguf_repo": "llmware/bonchon",
      "link": "https://huggingface.co/llmware/bonchon",
-     "custom_model_files": ["llama-2-7b-chat.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
     {"model_name": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", "display_name": "openhermes-mistral-7b-gguf",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -483,7 +484,7 @@
      "gguf_file": "openhermes-2.5-mistral-7b.Q4_K_M.gguf",
      "gguf_repo": "llmware/bonchon",
      "link": "https://huggingface.co/llmware/bonchon",
-     "custom_model_files": ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
     {"model_name": "TheBloke/zephyr-7B-beta-GGUF", "display_name": "zephyr-7b-gguf",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -492,7 +493,7 @@
      "gguf_file": "zephyr-7b-beta.Q4_K_M.gguf",
      "gguf_repo": "llmware/bonchon",
      "link": "https://huggingface.co/llmware/bonchon",
-     "custom_model_files": ["zephyr-7b-beta.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"},
+     "custom_model_files": [], "custom_model_repo": ""},
 
     {"model_name": "TheBloke/Starling-LM-7B-alpha-GGUF", "display_name": "starling-7b-gguf",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
@@ -501,24 +502,22 @@
      "gguf_file": "starling-lm-7b-alpha.Q4_K_M.gguf",
      "gguf_repo": "llmware/bonchon",
      "link": "https://huggingface.co/llmware/bonchon",
-     "custom_model_files": ["starling-lm-7b-alpha.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"
-     },
+     "custom_model_files": [], "custom_model_repo": ""},
 
     # new slim models
     {"model_name": "slim-ner-tool", "display_name": "slim-ner-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-ner.gguf",
      "gguf_repo": "llmware/slim-ner-tool",
      "link": "https://huggingface.co/llmware/slim-ner-tool",
-     "custom_model_files": ["slim-ner.gguf"], "custom_model_repo": "llmware/slim-ner-tool",
-     # add function call parameters
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["people", "location", "organization", "misc"],
      "fc_output_values": [],
      "tokenizer": "llmware/slim-ner",
-     "value_zone_markers": {"start": [6024,6796, 3366], "stop": [2033,3108]},
      "marker_tokens": [], "marker_token_lookup": {},
      "function": ["classify"],
      "snapshot": True},
@@ -526,17 +525,16 @@
     {"model_name": "slim-sentiment-tool", "display_name": "slim-sentiment-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-sentiment.gguf",
      "gguf_repo": "llmware/slim-sentiment-tool",
      "link": "https://huggingface.co/llmware/slim-sentiment-tool",
-     "custom_model_files": ["slim-sentiment.gguf"], "custom_model_repo": "llmware/slim-sentiment-tool",
-     # add function call parameters
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["sentiment"],
      "fc_output_values": ["positive", "neutral", "negative"],
      "tokenizer": "llmware/slim-sentiment",
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [1066, 22198, 17821],
      "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"},
      "function": ["classify"],
@@ -545,12 +543,12 @@
     {"model_name": "slim-emotions-tool", "display_name": "slim-emotions-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-emotions.gguf",
      "gguf_repo": "llmware/slim-emotions-tool",
      "link": "https://huggingface.co/llmware/slim-emotions-tool",
-     "custom_model_files": ["slim-emotions.gguf"], "custom_model_repo": "llmware/slim-emotions-tool",
-     # add function call parameters
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["emotions"],
      "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive",
@@ -560,7 +558,6 @@
                           "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised",
                           "terrified", "trusting"],
      "tokenizer": "llmware/slim-emotions",
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"],
@@ -569,17 +566,16 @@
     {"model_name": "slim-ratings-tool", "display_name": "slim-ratings-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-ratings.gguf",
      "gguf_repo": "llmware/slim-ratings-tool",
      "link": "https://huggingface.co/llmware/slim-ratings-tool",
-     "custom_model_files": ["slim-ratings.gguf"], "custom_model_repo": "llmware/slim-ratings-tool",
-     # add function call parameters
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["rating"],
      "fc_output_values": ["1", "2", "3", "4", "5"],
      "tokenizer": "llmware/slim-ratings",
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"],
@@ -588,18 +584,18 @@
    {"model_name": "slim-intent-tool", "display_name": "slim-intent-tool",
     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-    "temperature": 0.3, "trailing_space": "",
+    "temperature": 0.0, "sample_default": False, "trailing_space": "",
     "gguf_file": "slim-intent.gguf",
     "gguf_repo": "llmware/slim-intent-tool",
     "link": "https://huggingface.co/llmware/slim-intent-tool",
-    "custom_model_files": ["slim-intent.gguf"], "custom_model_repo": "llmware/slim-intent-tool",
+    "custom_model_files": [], "custom_model_repo": "",
+    "output_type": "dict",
     "function_call": True,
     "primary_keys": ["intent"],
     "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback",
                          "invoice", "new account", "order", "payments", "refund", "shipping",
                          "subscription", "terminate"],
     "tokenizer": "llmware/slim-intent",
-    "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]},
     "marker_tokens": [],
     "marker_token_lookup": {},
     "function": ["classify"],
@@ -608,16 +604,16 @@
     {"model_name": "slim-nli-tool", "display_name": "slim-nli-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-nli.gguf",
      "gguf_repo": "llmware/slim-nli-tool",
      "link": "https://huggingface.co/llmware/slim-nli-tool",
-     "custom_model_files": ["slim-nli.gguf"], "custom_model_repo": "llmware/slim-nli-tool",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["evidence"],
      "fc_output_values": ["supports", "neutral", "contradicts"],
      "tokenizer": "llmware/slim-nli",
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [9996,5924,17821],
      "marker_token_lookup": {9996: "contradicts", 5924: "supports", 17821: "neutral"},
      "function": ["classify"],
@@ -626,71 +622,70 @@
     {"model_name": "slim-topics-tool", "display_name": "slim-topics-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-topics.gguf",
      "gguf_repo": "llmware/slim-topics-tool",
      "link": "https://huggingface.co/llmware/slim-topics-tool",
-     "custom_model_files": ["slim-topics.gguf"], "custom_model_repo": "llmware/slim-topics-tool",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["topics"],
      "fc_output_values": [],
      "tokenizer": "llmware/slim-topics",
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"],
      "snapshot": True},
 
-  {"model_name": "slim-tags-tool", "display_name": "slim-tags-tool",
-   "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
-   "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-   "temperature": 0.3, "trailing_space": "",
-   "gguf_file": "slim-tags.gguf",
-   "gguf_repo": "llmware/slim-tags-tool",
-   "link": "https://huggingface.co/llmware/slim-tags-tool",
-   "custom_model_files": ["slim-tags.gguf"], "custom_model_repo": "llmware/slim-tags-tool",
-   "function_call": True,
-   "primary_keys": ["tags"],
-   "fc_output_values": [],
-   "tokenizer": "llmware/slim-tags",
-   "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
-   "marker_tokens": [],
-   "marker_token_lookup": {},
-   "function": ["classify"],
-   "snapshot": True},
+    {"model_name": "slim-tags-tool", "display_name": "slim-tags-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-tags.gguf", "gguf_repo": "llmware/slim-tags-tool",
+     "link": "https://huggingface.co/llmware/slim-tags-tool",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
+     "function_call": True,
+     "primary_keys": ["tags"],
+     "fc_output_values": [],
+     "tokenizer": "llmware/slim-tags",
+     "marker_tokens": [],
+     "marker_token_lookup": {},
+     "function": ["classify"],
+     "snapshot": True},
 
     {"model_name": "slim-sql-tool", "display_name": "slim-sql-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "slim-sql.gguf",
      "gguf_repo": "llmware/slim-sql-tool",
      "fc_output_values": [],
      "link": "https://huggingface.co/llmware/slim-sql-tool",
-     "custom_model_files": ["slim-sql.gguf"], "custom_model_repo": "llmware/slim-sql-tool",
+     "custom_model_files": [], "custom_model_repo": "",
      "tokenizer": "llmware/slim-sql-1b-v0",
      "snapshot": True},
 
     {"model_name": "bling-answer-tool", "display_name": "bling-answer-tool",
      "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "",
      "gguf_file": "bling-answer.gguf",
      "gguf_repo": "llmware/bling-answer-tool",
      "link": "https://huggingface.co/llmware/bling-answer-tool",
-     "custom_model_files": ["bling-answer.gguf"], "custom_model_repo": "llmware/bling-answer-tool",
-     # add function call parameters
+     "custom_model_files": [], "custom_model_repo": "",
      "tokenizer": "llmware/bling-tiny-llama-1b-v0",
      "snapshot": True},
 
    {"model_name": "slim-category-tool", "display_name": "slim-category-tool",
     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-    "temperature": 0.3, "trailing_space": "",
+    "temperature": 0.3, "sample_default": False, "trailing_space": "",
     "gguf_file": "slim-category.gguf",
     "gguf_repo": "llmware/slim-category-tool",
     "link": "https://huggingface.co/llmware/slim-category-tool",
-    "custom_model_files": ["slim-category.gguf"], "custom_model_repo": "llmware/slim-category-tool",
+    "custom_model_files": [], "custom_model_repo": "",
+    "output_type": "dict",
     "function_call": True,
     "primary_keys": ["category"],
     "fc_output_values": ["analyst", "announcements", "bonds", "business", "central bank", "commentary",
@@ -699,7 +694,6 @@
                          "markets", "mergers and acquisitions", "opinion", "politics", "public markets",
                          "science", "sports", "stocks", "tech", "world"],
     "tokenizer": "llmware/slim-category",
-    "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
     "marker_tokens": [],
     "marker_token_lookup": {},
     "function": ["classify"],
@@ -710,17 +704,17 @@
     {"model_name": "llmware/slim-intent", "display_name": "slim-intent-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-intent",
      "hf_repo": "llmware/slim-intent",
      "custom_model_files": [""], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["intent"],
      "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback",
                           "invoice", "new account", "order", "payments", "refund", "shipping",
                           "subscription", "terminate"],
      "function": ["classify"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [1066, 22198, 17821],
      "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"},
      },
@@ -728,14 +722,14 @@
     {"model_name": "llmware/slim-sentiment", "display_name": "slim-sentiment-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-sentiment",
      "hf_repo": "llmware/slim-sentiment",
      "custom_model_files": [""], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["sentiment"],
      "fc_output_values": ["positive", "neutral", "negative"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [1066, 22198, 17821],
      "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"},
      "function": ["classify"]},
@@ -743,10 +737,11 @@
     {"model_name": "llmware/slim-emotions", "display_name": "slim-emotions-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-emotions",
      "hf_repo": "llmware/slim-emotions",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["emotions"],
      "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive",
@@ -755,7 +750,6 @@
                           "hopeful", "impressed", "jealous", "joy", "joyful", "lonely", "love", "nostalgic",
                           "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised",
                           "terrified", "trusting"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [1066, 22198, 17821],
      "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"},
      "function": ["classify"]},
@@ -763,14 +757,14 @@
     {"model_name": "llmware/slim-ner", "display_name": "slim-ner-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-ner",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "hf_repo": "llmware/slim-ner",
      "function_call": True,
      "primary_keys": ["person", "organization", "place", "misc"],
      "fc_output_values": [],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"]},
@@ -778,14 +772,14 @@
     {"model_name": "llmware/slim-nli", "display_name": "slim-nli-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-nli",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
      "hf_repo": "llmware/slim-nli",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["evidence"],
      "fc_output_values": ["supports", "neutral", "contradicts"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"]},
@@ -793,14 +787,14 @@
     {"model_name": "llmware/slim-ratings", "display_name": "slim-ratings-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-ratings",
      "hf_repo": "llmware/slim-ratings",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "function_call": True,
      "primary_keys": ["rating"],
      "fc_output_values": ["1", "2", "3", "4", "5"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"]},
@@ -808,9 +802,10 @@
     {"model_name": "llmware/slim-category", "display_name": "slim-category-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-category",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict",
      "hf_repo": "llmware/slim-category",
      "function_call": True,
      "primary_keys": ["category"],
@@ -819,7 +814,6 @@
                           "financials", "health", "human resources", "legal and regulation", "macroeconomics",
                           "markets", "mergers and acquisitions", "opinion", "politics", "public markets",
                           "science", "sports", "stocks", "tech", "world"],
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "function": ["classify"]},
@@ -827,12 +821,12 @@
     {"model_name": "llmware/slim-tags", "display_name": "slim-tags-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
      "link": "https://huggingface.co/llmware/slim-tags",
-     "custom_model_files": [""], "custom_model_repo": "",
+     "custom_model_files": [], "custom_model_repo": "",
      "hf_repo": "llmware/slim-tags",
+     "outout_type": "dict",
      "function_call": True,
-     "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]},
      "marker_tokens": [],
      "marker_token_lookup": {},
      "primary_keys": ["tags"],
@@ -842,12 +836,12 @@
    {"model_name": "llmware/slim-topics", "display_name": "slim-topics-1b",
     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-    "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+    "temperature": 0.0,"sample_default": False,  "trailing_space": "", "gguf_file": "", "gguf_repo": "",
     "link": "https://huggingface.co/llmware/slim-topics",
     "hf_repo": "llmware/slim-topics",
-    "custom_model_files": [""], "custom_model_repo": "",
+    "custom_model_files": [], "custom_model_repo": "",
+    "output_type": "dict",
     "function_call": True,
-    "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]},
     "marker_tokens": [],
     "marker_token_lookup": {},
     "primary_keys": ["topics"],
@@ -858,7 +852,8 @@
     {"model_name": "llmware/slim-sql-1b-v0", "display_name": "slim-sql-1b",
      "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
      "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-     "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/slim-sql-1b-v0",
+     "temperature": 0.0, "sample_default": False,
+     "trailing_space": "", "link": "https://huggingface.co/llmware/slim-sql-1b-v0",
      "custom_model_files": [], "custom_model_repo": "",
      "hf_repo": "llmware/slim-sql-1b-v0",
      #TODO: assess how to handle SQL models with function call parameters
@@ -866,15 +861,137 @@
      "fc_output_values": [],
      "primary_keys": ["sql"], "function": ["sql"]},
 
-  {"model_name": "bling-stablelm-3b-tool", "display_name": "llmware/bling-stablelm-3b-gguf",
-  "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
-  "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
-  "temperature": 0.3, "trailing_space": "",
-  "gguf_file": "bling-stablelm.gguf",
-  "gguf_repo": "llmware/bling-stablelm-3b-gguf",
-  "snapshot": True,
-  "link": "https://huggingface.co/llmware/bling-stablelm-3b-gguf",
-  "custom_model_files": ["bling-stablelm.gguf"], "custom_model_repo": "llmware/bling-stablelm-3b-gguf"},
+    {"model_name": "bling-stablelm-3b-tool", "display_name": "llmware/bling-stablelm-3b-gguf",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "bling-stablelm.gguf",
+     "gguf_repo": "llmware/bling-stablelm-3b-gguf", "snapshot": True,
+     "link": "https://huggingface.co/llmware/bling-stablelm-3b-gguf",
+     "custom_model_files": [], "custom_model_repo": ""},
+
+    {"model_name": "slim-xsum", "display_name": "llmware/slim-xsum",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-xsum", "hf_repo": "llmware/slim-xsum",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True,
+     "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["xsum"], "fc_output_values": [],
+     "function": ["classify"]},
+
+    {"model_name": "slim-xsum-tool", "display_name": "slim-xsum-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-xsum.gguf", "gguf_repo": "llmware/slim-xsum-tool",
+     "link": "https://huggingface.co/llmware/slim-xsum-tool",
+     "custom_model_files": [], "custom_model_repo": "",
+     "output_type": "dict", "function_call": True, "primary_keys": ["xsum"], "fc_output_values": [],
+     "tokenizer": "llmware/slim-xsum",
+     "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"], "snapshot": True},
+
+    {"model_name": "slim-extract", "display_name": "llmware/slim-extract",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-extract", "hf_repo": "llmware/slim-extract",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True,
+     "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["key data points"], "fc_output_values": [],
+     "function": ["extract"]},
+
+    {"model_name": "slim-extract-tool", "display_name": "slim-extract-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-extract.gguf", "gguf_repo": "llmware/slim-extract-tool",
+     "link": "https://huggingface.co/llmware/slim-extract-tool",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
+     "function_call": True, "primary_keys": ["key data points"], "fc_output_values": [],
+     "tokenizer": "llmware/slim-extract", "marker_tokens": [],
+     "marker_token_lookup": {}, "function": ["extract"], "snapshot": True},
+
+    {"model_name": "slim-boolean", "display_name": "llmware/slim-boolean",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-boolean", "hf_repo": "llmware/slim-boolean",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True,
+     "marker_tokens": [2369,9820], "marker_token_lookup": {2369: "no", 9820: "yes"},
+     "primary_keys": [], "fc_output_values": [],
+     "function": ["boolean"]},
+
+    {"model_name": "slim-boolean-tool", "display_name": "slim-boolean-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-boolean.gguf", "gguf_repo": "llmware/slim-boolean-tool",
+     "link": "https://huggingface.co/llmware/slim-boolean-tool",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
+     "function_call": True, "primary_keys": [], "fc_output_values": [],
+     "tokenizer": "llmware/slim-boolean",
+     "marker_tokens": [2369,9820], "marker_token_lookup": {2369: "no", 9820: "yes"},
+     "function": ["boolean"], "snapshot": True},
+
+    {"model_name": "slim-sa-ner", "display_name": "llmware/slim-sa-ner",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-sa-ner", "hf_repo": "llmware/slim-sa-ner",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True,
+     "marker_tokens": [], "marker_token_lookup": {},
+     "primary_keys": ["sentiment, person, organization, place"], "fc_output_values": [],
+     "function": ["classify"]},
+
+    {"model_name": "slim-sa-ner-tool", "display_name": "slim-sa-ner-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "sa-ner.gguf", "gguf_repo": "llmware/slim-sa-ner-tool",
+     "link": "https://huggingface.co/llmware/slim-sa-ner-tool",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
+     "function_call": True, "primary_keys": ["sentiment, person, organization, place"], "fc_output_values": [],
+     "tokenizer": "llmware/slim-sa-ner", "marker_tokens": [],
+     "marker_token_lookup": {}, "function": ["classify"], "snapshot": True},
+
+    {"model_name": "slim-tags-3b", "display_name": "llmware/slim-tags-3b",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-tags-3b", "hf_repo": "llmware/slim-tags-3b",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict", "function_call": True,
+     "marker_tokens": [], "marker_token_lookup": {},
+     "primary_keys": ["tags"], "fc_output_values": [],
+     "function": ["classify"]},
+
+    {"model_name": "slim-tags-3b-tool", "display_name": "slim-tags-3b-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-tags-3b.gguf", "gguf_repo": "llmware/slim-tags-3b-tool",
+     "link": "https://huggingface.co/llmware/slim-tags-3b-tool",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
+     "function_call": True, "primary_keys": ["tags"], "fc_output_values": [],
+     "tokenizer": "llmware/slim-tags-3b", "marker_tokens": [],
+     "marker_token_lookup": {}, "function": ["classify"], "snapshot": True},
+
+    {"model_name": "slim-summary", "display_name": "llmware/slim-summary",
+     "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo",
+     "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
+     "temperature": 0.0, "sample_default": False, "trailing_space": "", "gguf_file": "", "gguf_repo": "",
+     "link": "https://huggingface.co/llmware/slim-summary", "hf_repo": "llmware/slim-summary",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "list", "function_call": True,
+     "marker_tokens": [], "marker_token_lookup": {}, "primary_keys": ["key points (3)"], "fc_output_values": [],
+     "function": ["summarize"]},
+
+    {"model_name": "slim-summary-tool", "display_name": "slim-summary-tool",
+     "model_family": "GGUFGenerativeModel", "model_category": "generative_local",
+     "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+     "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
+     "gguf_file": "slim-summarize.gguf", "gguf_repo": "llmware/slim-summary-tool",
+     "link": "https://huggingface.co/llmware/slim-summary-tool",
+     "custom_model_files": [], "custom_model_repo": "", "output_type": "list",
+     "function_call": True, "primary_keys": ["key points (3)"], "fc_output_values": [],
+     "tokenizer": "llmware/slim-summary",
+     "marker_tokens": [], "marker_token_lookup": {}, "function": ["summarize"], "snapshot": True}
 
 ]
 
diff --git a/llmware/models.py b/llmware/models.py
index 02f81238..2d97a2d9 100644
--- a/llmware/models.py
+++ b/llmware/models.py
@@ -87,7 +87,8 @@ class _ModelRegistry:
     #   list of function calling classifier tools
 
     llm_fx_tools = ["ner", "sentiment", "topics", "ratings", "emotions", "nli",
-                    "intent", "sql", "answer", "category", "tags"]
+                    "intent", "sql", "answer", "category", "tags", "summary", "xsum", "extract",
+                    "boolean", "sa-ner","tags-3b"]
 
     llm_fx_tools_map = {"ner": "slim-ner-tool",
                         "sentiment": "slim-sentiment-tool",
@@ -99,7 +100,15 @@ class _ModelRegistry:
                         "tags": "slim-tags-tool",
                         "answer": "bling-answer-tool",
                         "category": "slim-category-tool",
-                        "intent": "slim-intent-tool"}
+                        "intent": "slim-intent-tool",
+                        # new tools added
+                        "summary": "slim-summary-tool",
+                        "xsum": "slim-xsum-tool",
+                        "extract": "slim-extract-tool",
+                        "boolean": "slim-boolean-tool",
+                        "sa-ner": "slim-sa-ner-tool",
+                        "tags-3b": "slim-tags-3b-tool"
+                        }
     @classmethod
     def get_model_list(cls):
         """ List current view of registered models """
@@ -255,6 +264,7 @@ def __init__(self):
         self.sample = True
         self.max_output = 100
         self.get_logits = False
+        self.force_reload = False
 
     def pull_latest_manifest(self):
         """ Not implemented currently """
@@ -496,6 +506,23 @@ def locate_and_retrieve_model_bits (self, model_card, api_key=None):
 
         model_folder_name = model_card["model_name"]
 
+        # new insert - check if custom_model_repo
+        if "custom_model_repo" in model_card:
+            if model_card["custom_model_repo"]:
+                if os.path.exists(model_card["custom_model_repo"]):
+                    if "custom_model_files" in model_card:
+                        if model_card["custom_model_files"]:
+                            if len(model_card["custom_model_files"]) > 0:
+                                if os.path.exists(os.path.join(model_card["custom_model_repo"],
+                                                               model_card["custom_model_files"][0])):
+                                    # confirmed that custom path and at least model artifact exist
+                                    print("update: returning custom model path: ", model_card["custom_model_repo"],
+                                          model_card["custom_model_files"])
+
+                                    return model_card["custom_model_repo"]
+                else:
+                    raise ModelNotFoundException(f"Custom model repo path - {model_card['custom_model_repo']}")
+
         if model_card["model_family"] == "GGUFGenerativeModel":
             model_folder_name = model_folder_name.split("/")[-1]
 
@@ -504,7 +531,7 @@ def locate_and_retrieve_model_bits (self, model_card, api_key=None):
 
         model_location = os.path.join(LLMWareConfig.get_model_repo_path(), model_folder_name)
 
-        if os.path.exists(model_location):
+        if os.path.exists(model_location) and not self.force_reload:
             model_parts_in_folder = os.listdir(model_location)
             if len(model_parts_in_folder) > 0:
 
@@ -648,7 +675,7 @@ def _instantiate_model_class_from_string(self, model_class, model_name, model_ca
         return my_model
 
     def load_model (self, selected_model, api_key=None, use_gpu=True, sample=True,get_logits=False,
-                    max_output=100, temperature=-99):
+                    max_output=100, temperature=-99, force_reload=False):
 
         """ Main method for loading and fully instantiating a model based solely on the model's name """
 
@@ -657,6 +684,7 @@ def load_model (self, selected_model, api_key=None, use_gpu=True, sample=True,ge
         self.sample=sample
         self.max_output=max_output
         self.get_logits=get_logits
+        self.force_reload = force_reload
 
         # note: temperature set by default at -99, which is a dummy value that is over-ridden by the temperature
         # in the model card.   This temperature will only be used if explicitly set by the user at value != -99
@@ -1024,8 +1052,13 @@ def tool_test_run(self, model_name, api_key=None, verbose=False,
                         if "conclusion" in entries:
                             text = "Evidence: " + text + "\nConclusion: " + entries["conclusion"]
 
-                        # note: testing with temp & max_output
-                        response = model.function_call(text)
+                        # special case for boolean (question = params)
+                        if "question" in entries:
+                            params = entries["question"] + " (explain)"
+                            response = model.function_call(text, params=[params])
+                        else:
+                            # general case - use default params and function from model card
+                            response = model.function_call(text)
 
                         # if verbose:
                         print(f"\nupdate: context - test - {i} - {text}")
@@ -1110,14 +1143,6 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
 
         """ Analyzes logits from llm response """
 
-        # value zone markers
-        vz_start = []
-        vz_stop = []
-
-        if "value_zone_markers" in model_card:
-            vz_start = model_card["value_zone_markers"]["start"]
-            vz_stop = model_card["value_zone_markers"]["stop"]
-
         # marker tokens for sentiment analysis
         marker_tokens = []
         marker_token_lookup = {}
@@ -1134,6 +1159,15 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
             # hf tokenizer name
             tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_name, token=api_key)
 
+            try:
+                # pull bos attributes from tokenizer
+                bos_token_id = tokenizer.bos_token_id
+                bos_str = tokenizer.bos_token
+            except:
+                # unexpected - but if fail, then take llama defaults
+                bos_token_id = 1
+                bos_str = "<s>"
+
             ryg_string = ""
 
             token_probs = []
@@ -1143,7 +1177,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
 
             for i, toks in enumerate(response["output_tokens"]):
 
-                if toks in vz_stop:
+                # change - look directly for '[' in tokenized output
+                if "]" in tokenizer.decode(toks):
                     vz_capture_on = False
 
                 if toks in marker_tokens:
@@ -1172,7 +1207,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
 
                     vz_choices.append(new_entry)
 
-                if toks in vz_start:
+                # change - look for "[" directly in token decoded output
+                if "[" in tokenizer.decode(toks):
                     vz_capture_on = True
 
                 if toks == 2:
@@ -1185,10 +1221,10 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
                         token_probs.append(logits[i][x][1])
 
                         if logits[i][x][1] > 0.70:
-                            ryg_string += green + tokenizer.decode([1, logits[i][x][0]])
+                            ryg_string += green + tokenizer.decode([bos_token_id, logits[i][x][0]])
 
                         if 0.3 <= logits[i][x][1] <= 0.70:
-                            ryg_string += yellow + tokenizer.decode([1, logits[i][x][0]])
+                            ryg_string += yellow + tokenizer.decode([bos_token_id, logits[i][x][0]])
 
                             new_entry = {}
                             for y in range(0, 3):
@@ -1199,7 +1235,7 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
                             low_confidence_choices.append(new_entry)
 
                         if logits[i][x][1] < 0.3:
-                            ryg_string += red + tokenizer.decode([1, logits[i][x][0]])
+                            ryg_string += red + tokenizer.decode([bos_token_id, logits[i][x][0]])
 
                             new_entry = {}
                             for y in range(0, 3):
@@ -1209,7 +1245,8 @@ def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None):
 
                             low_confidence_choices.append(new_entry)
 
-            ryg_string = ryg_string.replace("<s>", "")
+            # removing hard-coded "<s>"
+            ryg_string = ryg_string.replace(bos_str, "")
 
         logit_analysis = {"ryg_string": ryg_string + color_reset, "choices": vz_choices,
                           "marker_tokens": marker_token_probs,
@@ -1272,7 +1309,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
 
         #   if very short output, then can not remediate - assume that a bigger problem happened with the inference
         if len(input_string) < starter:
-            print("update: llm response very short - could not remediate and convert to dict or list")
+            # print("update: llm response very short - could not remediate and convert to dict or list")
             return "string", input_string
 
         start = -1
@@ -1292,7 +1329,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
                 list_start = x
 
         if start < 0 and list_start < 0:
-            print("update: remediation not successful - could not find a start marker for dictionary or list")
+            # print("update: remediation not successful - could not find a start marker for dictionary or list")
             return "string", input_string
 
         #  based on the start marker, determine the target output type
@@ -1301,6 +1338,7 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
             list_type = True
             key_or_value = "value"
             response_type = "list"
+            start = list_start-1
         else:
             # try to build the string as a dictionary output
             list_type = False
@@ -1314,6 +1352,8 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
         output_list = []
         current_key = ""
 
+        # print("***test*** - remediation - input string - ", input_string)
+
         for y in range(start + 1, len(input_string)):
 
             #   note: ASCII ORD conversion - 58 - ':' | 91 - '[' | 93 - ']' | 44 - ','
@@ -1333,28 +1373,45 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
             # string markers of ' and "
             if ord(input_string[counter]) in [34, 39]:
 
-                if not string_on:
-                    string_on = True
-                    key_tmp = ""
-                else:
-                    # end of string token
-                    string_on = False
+                # insert new check if ' followed by 's'
+                exception_skip = False
+                if len(input_string) > counter+1:
+                    if ord(input_string[counter+1]) in [115]:
+                        exception_skip = True
+                        # counter += 1
+                # end - new check
 
-                    if len(key_tmp) > 0:
+                if not exception_skip:
 
-                        if not list_type:
-                            if key_or_value == "key":
-                                keys.append(key_tmp)
-                                current_key = key_tmp
-                                output_dict.update({current_key: []})
+                    if not string_on:
+                        string_on = True
+                        key_tmp = ""
+
+                    else:
+                        # end of string token
+                        string_on = False
+
+                        if len(key_tmp) > 0:
+
+                            if not list_type:
+                                if key_or_value == "key":
+                                    keys.append(key_tmp)
+                                    current_key = key_tmp
+                                    output_dict.update({current_key: []})
+
+                                else:
+                                    values.append(key_tmp)
+                                    if current_key in output_dict:
+                                        output_dict[current_key].append(key_tmp)
+                                    else:
+                                        logging.warning("update: remediation - could not find key-value to correct - output "
+                                                        "may be missing certain content in structured output.")
+
+                                key_tmp = ""
                             else:
+                                output_list.append(key_tmp)
                                 values.append(key_tmp)
-                                output_dict[current_key].append(key_tmp)
-                            key_tmp = ""
-                        else:
-                            output_list.append(key_tmp)
-                            values.append(key_tmp)
-                            key_tmp = ""
+                                key_tmp = ""
 
             if ord(input_string[counter]) == 58:
 
@@ -1382,9 +1439,175 @@ def remediate_function_call_string(self,input_string, dedupe_values=True):
         else:
             # remediation successful in converting to list output
             if dedupe_values:
-                output_list = list(set(output_list))
+                dd_output = []
+                for elements in output_list:
+                    if elements not in dd_output:
+                        dd_output.append(elements)
+
+                # not using set because it can change the order of the list from output
+                # output_list = list(set(output_list))
+
+                output_list = dd_output
+
             return response_type, output_list
 
+    def analyze_sampling(self,response):
+
+        """ Analyzes a llm response output dictionary and produces a 'sampling_stats' dictionary to provide
+        details on the effects, if any, of sampling in the output generation. """
+
+        sampling_stats = {}
+
+        if "logits" not in response or "output_tokens" not in response:
+            logging.warning("warning: function get_fx_scores requires a response dictionary with 'logits' key - "
+                            "not found in the current response provided.  Set the model parameters to 'get_logits=True'"
+                            "for function call to provide logits")
+            return sampling_stats
+
+        logits = response["logits"]
+        output_tokens = response["output_tokens"]
+
+        not_top_selected = 0
+        top_token_not_used = []
+
+        if len(output_tokens) == 0:
+            return sampling_stats
+
+        for x in range(0, len(output_tokens)):
+
+            top_selected = True
+
+            if output_tokens[x] != logits[x][0][0] and x > 0:
+                top_selected = False
+                top_token_not_used.append((x, output_tokens[x], logits[x]))
+
+            if not top_selected and x > 0:
+                not_top_selected += 1
+
+        tokens_considered = len(output_tokens) - 1
+        if tokens_considered > 0:
+            percent_top_token = (tokens_considered - not_top_selected) / tokens_considered
+        else:
+            percent_top_token = 0.0
+
+        # sampling_stats added to the output dictionary
+        sampling_stats.update({"total_output_tokens": len(output_tokens),
+                               "percent_top_token": round(percent_top_token, 3),
+                               "not_top_tokens": top_token_not_used})
+
+        return sampling_stats
+
+    def get_fx_scores(self,response, hf_tokenizer_name, top_choices=3, logit_count=1, api_key=None):
+
+        """ Provides useful metrics and scores derived from analyzing the logits and output tokens from function call
+        llm response - currently only supported for HFGenerative and GGUFGenerative models.
+
+        Inputs:
+            -- llm response dictionary, including logits and output tokens
+            -- hf_tokenizer_name for the model, which will be used to decode output tokens, logits and identify key
+                'value zone' markers for the output response, e.g., identify list boundaries '[' and ']'
+            -- top_choices - number of candidates to consider in each logit, e.g., top 3 choices considered
+            -- logit_count - number of tokens to consider in the value zone, whether the first only, or more
+            -- api_key - optional, if tokenizer in private repository requiring an api key
+
+        Output (dictionary):
+            -- for each key in the output response, there is a list of the candidate logits in the value zone associated
+                with that key - the list will be the length of the logit count requested
+            -- a sampling_stats key will also be produced that will provide summary data on the number of 'value zone'
+                tokens, the percentage taken from the top output logit candidate and a list of the 'sampled', e.g.,
+                'not top' logits taken
+        """
+
+        # output is a dict of dict
+        output = {}
+
+        if "logits" not in response or "output_tokens" not in response:
+            logging.warning("warning: function get_fx_scores requires a response dictionary with 'logits' key - "
+                            "not found in the current response provided.  Set the model parameters to 'get_logits=True'"
+                            "for function call to provide logits")
+            return output
+
+        logits = response["logits"]
+
+        keys_list = []
+        llm_response = response["llm_response"]
+
+        if isinstance(llm_response, dict):
+            for key, value in llm_response.items():
+                keys_list.append(key)
+        elif isinstance(llm_response, list):
+            keys_list.append("llm_response")
+        else:
+            keys_list.append("llm_response")
+
+        # hf tokenizer name
+        try:
+            from transformers import AutoTokenizer
+        except ImportError:
+            raise DependencyNotInstalledException("transformers")
+
+        tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_name, token=api_key)
+
+        vz_choices = []
+        vz_capture_on = False
+        key_counter = 0
+
+        min_threshold = 0.005
+        vz_logits = 0
+        vz_top_logits = 0
+        top_token_not_used = []
+
+        for i, toks in enumerate(response["output_tokens"]):
+
+            decoded = tokenizer.decode(toks)
+
+            if "]" in decoded:
+                vz_capture_on = False
+                if vz_choices:
+                    output.update({keys_list[key_counter]: vz_choices})
+                    key_counter += 1
+                    vz_choices = []
+
+            if vz_capture_on:
+
+                new_entry = {}
+                if toks == logits[i][0][0]:
+                    vz_top_logits += 1
+                else:
+                    # the output token does not correspond to the logit with the highest score, so there was a
+                    # 'sampling' effect to this generation - adding this token and corresponding logit to be saved
+                    # and provided as output in 'sampling_stats'
+                    # print("no match: ", i, tokenizer.decode(toks), tokenizer.decode(logits[i][0][0]),toks, logits[i])
+                    top_token_not_used.append((i, toks, logits[i]))
+
+                vz_logits += 1
+
+                for x in range(0, top_choices):
+
+                    if logits[i][x][1] >= min_threshold:
+                        new_entry.update({tokenizer.decode(logits[i][x][0]): round(logits[i][x][1], 3)})
+
+                if len(vz_choices) < logit_count:
+                    vz_choices.append(new_entry)
+
+            if "[" in decoded:
+                vz_capture_on = True
+                vz_choices = []
+
+        if vz_top_logits > 0:
+            top_token_in_value_zone = round(vz_logits / vz_top_logits, 2)
+        else:
+            top_token_in_value_zone = 0.0
+
+        # sampling_stats added to the output dictionary
+        output.update({"sampling_stats": {"total_vz_tokens": vz_logits,
+                                          "percent_top_token": top_token_in_value_zone,
+                                          "not_top_tokens": top_token_not_used}
+                       })
+
+        return output
+
+
 class PromptCatalog:
     """ PromptCatalog manages prompt styles and prompt wrappers. """
 
@@ -3895,6 +4118,7 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
             if self.model_card:
                 if "hf_repo" in self.model_card:
                     hf_repo_name = self.model_card["hf_repo"]
+                    self.hf_tokenizer_name = hf_repo_name
 
             if api_key:
                 if torch.cuda.is_available():
@@ -4428,7 +4652,8 @@ def register_top_logits(self, next_token_logit):
         top_logits = []
         # by default, self.top_logit_count = 10, will get the top 10 highest values in logit output
         for x in range(0, self.top_logit_count):
-            pair = (sm_args_sorted[logit_size - x - 1], sm_sorted[logit_size - x - 1])
+            # experiment - rounding the long float number
+            pair = (sm_args_sorted[logit_size - x - 1], round(sm_sorted[logit_size - x - 1],3))
             top_logits.append(pair)
 
         self.logits_record.append(top_logits)
@@ -5337,7 +5562,8 @@ def sample(self, idx=0, logits_array=None):
 
             nl_logit = logits_array[nl_token]
 
-            if self.penalty_last_n > 0:
+            # note: important to skip this if use_sampling is False
+            if self.penalty_last_n > 0 and self.use_sampling:
 
                 self._lib.llama_sample_repetition_penalties(self._ctx.ctx,
                                                             ctypes.byref(token_data_array.candidates),
@@ -5447,7 +5673,8 @@ def register_top_logits(self):
         top_logits = []
 
         for x in range(0,self.top_logit_count):
-            pair = (sm_args_sorted[logit_size-x-1],sm_sorted[logit_size-x-1])
+            # experiment - try rounding the float number
+            pair = (sm_args_sorted[logit_size-x-1],round(sm_sorted[logit_size-x-1],3))
             top_logits.append(pair)
 
         self.logits_record.append(top_logits)