Skip to content

Commit

Permalink
rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Apr 12, 2024
1 parent 3ace5fe commit 7e5373f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 24 deletions.
35 changes: 17 additions & 18 deletions notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,9 @@
" if (fp16_model_dir / \"openvino_model.xml\").exists():\n",
" return\n",
" remote_code = llm_model_configuration.get(\"remote_code\", False)\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(pt_model_id)\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(\n",
" pt_model_id\n",
" )\n",
" if remote_code:\n",
" export_command_base += \" --trust_remote_code\"\n",
" export_command = export_command_base + \" \" + str(fp16_model_dir)\n",
Expand All @@ -337,7 +339,9 @@
" return\n",
" int8_model_dir.mkdir(parents=True, exist_ok=True)\n",
" remote_code = llm_model_configuration.get(\"remote_code\", False)\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(pt_model_id)\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(\n",
" pt_model_id\n",
" )\n",
" if remote_code:\n",
" export_command_base += \" --trust_remote_code\"\n",
" export_command = export_command_base + \" \" + str(int8_model_dir)\n",
Expand Down Expand Up @@ -393,11 +397,7 @@
" \"group_size\": 128,\n",
" \"ratio\": 0.72,\n",
" },\n",
" \"qwen-7b-chat\": {\n",
" \"sym\": True,\n",
" \"group_size\": 128,\n",
" \"ratio\": 0.6\n",
" },\n",
" \"qwen-7b-chat\": {\"sym\": True, \"group_size\": 128, \"ratio\": 0.6},\n",
" \"red-pajama-3b-chat\": {\n",
" \"sym\": False,\n",
" \"group_size\": 128,\n",
Expand All @@ -416,8 +416,12 @@
" if (int4_model_dir / \"openvino_model.xml\").exists():\n",
" return\n",
" remote_code = llm_model_configuration.get(\"remote_code\", False)\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(pt_model_id)\n",
" int4_compression_args = \" --group-size {} --ratio {}\".format(model_compression_params[\"group_size\"], model_compression_params[\"ratio\"])\n",
" export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(\n",
" pt_model_id\n",
" )\n",
" int4_compression_args = \" --group-size {} --ratio {}\".format(\n",
" model_compression_params[\"group_size\"], model_compression_params[\"ratio\"]\n",
" )\n",
" if model_compression_params[\"sym\"]:\n",
" int4_compression_args += \" --sym\"\n",
" export_command_base += int4_compression_args\n",
Expand All @@ -427,7 +431,6 @@
" display(Markdown(\"**Export command:**\"))\n",
" display(Markdown(f\"`{export_command}`\"))\n",
" ! $export_command\n",
" \n",
"\n",
"\n",
"if prepare_fp16_model.value:\n",
Expand Down Expand Up @@ -869,8 +872,7 @@
" model_dir = fp16_model_dir\n",
"print(f\"Loading model from {model_dir}\")\n",
"\n",
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\",\n",
" \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
"\n",
"# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy\n",
"# issues caused by this, which we avoid by setting precision hint to \"f32\".\n",
Expand Down Expand Up @@ -1239,8 +1241,7 @@
" css=\".disclaimer {font-variant-caps: all-small-caps;}\",\n",
") as demo:\n",
" gr.Markdown(\"\"\"<h1><center>QA over Document</center></h1>\"\"\")\n",
" gr.Markdown(\n",
" f\"\"\"<center>Powered by OpenVINO and {llm_model_id.value} </center>\"\"\")\n",
" gr.Markdown(f\"\"\"<center>Powered by OpenVINO and {llm_model_id.value} </center>\"\"\")\n",
" with gr.Row():\n",
" with gr.Column(scale=1):\n",
" docs = gr.File(\n",
Expand All @@ -1262,8 +1263,7 @@
" ],\n",
" )\n",
" load_docs = gr.Button(\"Step 2: Build Vector Store\")\n",
" db_argument = gr.Accordion(\n",
" \"Vector Store Configuration\", open=False)\n",
" db_argument = gr.Accordion(\"Vector Store Configuration\", open=False)\n",
" with db_argument:\n",
" spliter = gr.Dropdown(\n",
" [\"Character\", \"RecursiveCharacter\", \"Markdown\", \"Chinese\"],\n",
Expand Down Expand Up @@ -1366,8 +1366,7 @@
" submit = gr.Button(\"Submit\")\n",
" stop = gr.Button(\"Stop\")\n",
" clear = gr.Button(\"Clear\")\n",
" retriever_argument = gr.Accordion(\n",
" \"Retriever Configuration\", open=True)\n",
" retriever_argument = gr.Accordion(\"Retriever Configuration\", open=True)\n",
" with retriever_argument:\n",
" with gr.Row():\n",
" with gr.Row():\n",
Expand Down
16 changes: 10 additions & 6 deletions utils/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def internlm_partial_text_processor(partial_text, new_text):
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""+"""<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model"""
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
},
"red-pajama-3b-chat": {
"model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
Expand All @@ -98,7 +99,8 @@ def internlm_partial_text_processor(partial_text, new_text):
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""+"""<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model"""
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
},
"llama-2-chat-7b": {
"model_id": "meta-llama/Llama-2-7b-chat-hf",
Expand Down Expand Up @@ -285,7 +287,7 @@ def internlm_partial_text_processor(partial_text, new_text):
"tokenizer_kwargs": {"add_special_tokens": False},
"partial_text_processor": youri_partial_text_processor,
},
}
},
}

SUPPORTED_EMBEDDING_MODELS = {
Expand All @@ -299,7 +301,8 @@ def internlm_partial_text_processor(partial_text, new_text):
"model_id": "BAAI/bge-large-en-v1.5",
"mean_pooling": False,
"normalize_embeddings": True,
}, },
},
},
"Chinese": {
"bge-small-zh-v1.5": {
"model_id": "BAAI/bge-small-zh-v1.5",
Expand All @@ -310,11 +313,12 @@ def internlm_partial_text_processor(partial_text, new_text):
"model_id": "bge-large-zh-v1.5",
"mean_pooling": False,
"normalize_embeddings": True,
}, }
},
},
}


SUPPORTED_RERANK_MODELS = {
"bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"},
"bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"}
"bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"},
}

0 comments on commit 7e5373f

Please sign in to comment.