diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index 6e3d5b0573f..d938b6f5e9e 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -323,7 +323,9 @@ " if (fp16_model_dir / \"openvino_model.xml\").exists():\n", " return\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(pt_model_id)\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(\n", + " pt_model_id\n", + " )\n", " if remote_code:\n", " export_command_base += \" --trust_remote_code\"\n", " export_command = export_command_base + \" \" + str(fp16_model_dir)\n", @@ -337,7 +339,9 @@ " return\n", " int8_model_dir.mkdir(parents=True, exist_ok=True)\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(pt_model_id)\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(\n", + " pt_model_id\n", + " )\n", " if remote_code:\n", " export_command_base += \" --trust_remote_code\"\n", " export_command = export_command_base + \" \" + str(int8_model_dir)\n", @@ -393,11 +397,7 @@ " \"group_size\": 128,\n", " \"ratio\": 0.72,\n", " },\n", - " \"qwen-7b-chat\": {\n", - " \"sym\": True,\n", - " \"group_size\": 128,\n", - " \"ratio\": 0.6\n", - " },\n", + " \"qwen-7b-chat\": {\"sym\": True, \"group_size\": 128, \"ratio\": 0.6},\n", " \"red-pajama-3b-chat\": {\n", " \"sym\": False,\n", " \"group_size\": 128,\n", @@ -416,8 +416,12 @@ " if (int4_model_dir / \"openvino_model.xml\").exists():\n", " return\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(pt_model_id)\n", - " int4_compression_args = \" --group-size {} --ratio {}\".format(model_compression_params[\"group_size\"], model_compression_params[\"ratio\"])\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(\n", + " pt_model_id\n", + " )\n", + " int4_compression_args = \" --group-size {} --ratio {}\".format(\n", + " model_compression_params[\"group_size\"], model_compression_params[\"ratio\"]\n", + " )\n", " if model_compression_params[\"sym\"]:\n", " int4_compression_args += \" --sym\"\n", " export_command_base += int4_compression_args\n", @@ -427,7 +431,6 @@ " display(Markdown(\"**Export command:**\"))\n", " display(Markdown(f\"`{export_command}`\"))\n", " ! $export_command\n", - " \n", "\n", "\n", "if prepare_fp16_model.value:\n", @@ -869,8 +872,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", "\n", "# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy\n", "# issues caused by this, which we avoid by setting precision hint to \"f32\".\n", @@ -1239,8 +1241,7 @@ " css=\".disclaimer {font-variant-caps: all-small-caps;}\",\n", ") as demo:\n", " gr.Markdown(\"\"\"

QA over Document

\"\"\")\n", - " gr.Markdown(\n", - " f\"\"\"
Powered by OpenVINO and {llm_model_id.value}
\"\"\")\n", + " gr.Markdown(f\"\"\"
Powered by OpenVINO and {llm_model_id.value}
\"\"\")\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " docs = gr.File(\n", @@ -1262,8 +1263,7 @@ " ],\n", " )\n", " load_docs = gr.Button(\"Step 2: Build Vector Store\")\n", - " db_argument = gr.Accordion(\n", - " \"Vector Store Configuration\", open=False)\n", + " db_argument = gr.Accordion(\"Vector Store Configuration\", open=False)\n", " with db_argument:\n", " spliter = gr.Dropdown(\n", " [\"Character\", \"RecursiveCharacter\", \"Markdown\", \"Chinese\"],\n", @@ -1366,8 +1366,7 @@ " submit = gr.Button(\"Submit\")\n", " stop = gr.Button(\"Stop\")\n", " clear = gr.Button(\"Clear\")\n", - " retriever_argument = gr.Accordion(\n", - " \"Retriever Configuration\", open=True)\n", + " retriever_argument = gr.Accordion(\"Retriever Configuration\", open=True)\n", " with retriever_argument:\n", " with gr.Row():\n", " with gr.Row():\n", diff --git a/utils/llm_config.py b/utils/llm_config.py index 9fd7f25a1bf..f0c69a577dc 100644 --- a/utils/llm_config.py +++ b/utils/llm_config.py @@ -76,7 +76,8 @@ def internlm_partial_text_processor(partial_text, new_text): "start_message": DEFAULT_SYSTEM_PROMPT + ", ", "history_template": "user{user}model{assistant}", "current_message_template": "user{user}model{assistant}", - "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""+"""user{input}context{context}model""" + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" + + """user{input}context{context}model""", }, "red-pajama-3b-chat": { "model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", @@ -98,7 +99,8 @@ def internlm_partial_text_processor(partial_text, new_text): "start_message": DEFAULT_SYSTEM_PROMPT + ", ", "history_template": "user{user}model{assistant}", "current_message_template": "user{user}model{assistant}", - "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""+"""user{input}context{context}model""" + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" + + """user{input}context{context}model""", }, "llama-2-chat-7b": { "model_id": "meta-llama/Llama-2-7b-chat-hf", @@ -285,7 +287,7 @@ def internlm_partial_text_processor(partial_text, new_text): "tokenizer_kwargs": {"add_special_tokens": False}, "partial_text_processor": youri_partial_text_processor, }, - } + }, } SUPPORTED_EMBEDDING_MODELS = { @@ -299,7 +301,8 @@ def internlm_partial_text_processor(partial_text, new_text): "model_id": "BAAI/bge-large-en-v1.5", "mean_pooling": False, "normalize_embeddings": True, - }, }, + }, + }, "Chinese": { "bge-small-zh-v1.5": { "model_id": "BAAI/bge-small-zh-v1.5", @@ -310,11 +313,12 @@ def internlm_partial_text_processor(partial_text, new_text): "model_id": "bge-large-zh-v1.5", "mean_pooling": False, "normalize_embeddings": True, - }, } + }, + }, } SUPPORTED_RERANK_MODELS = { "bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"}, - "bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"} + "bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"}, }