diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index 6e3d5b0573f..d938b6f5e9e 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -323,7 +323,9 @@ " if (fp16_model_dir / \"openvino_model.xml\").exists():\n", " return\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(pt_model_id)\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16\".format(\n", + " pt_model_id\n", + " )\n", " if remote_code:\n", " export_command_base += \" --trust_remote_code\"\n", " export_command = export_command_base + \" \" + str(fp16_model_dir)\n", @@ -337,7 +339,9 @@ " return\n", " int8_model_dir.mkdir(parents=True, exist_ok=True)\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(pt_model_id)\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8\".format(\n", + " pt_model_id\n", + " )\n", " if remote_code:\n", " export_command_base += \" --trust_remote_code\"\n", " export_command = export_command_base + \" \" + str(int8_model_dir)\n", @@ -393,11 +397,7 @@ " \"group_size\": 128,\n", " \"ratio\": 0.72,\n", " },\n", - " \"qwen-7b-chat\": {\n", - " \"sym\": True,\n", - " \"group_size\": 128,\n", - " \"ratio\": 0.6\n", - " },\n", + " \"qwen-7b-chat\": {\"sym\": True, \"group_size\": 128, \"ratio\": 0.6},\n", " \"red-pajama-3b-chat\": {\n", " \"sym\": False,\n", " \"group_size\": 128,\n", @@ -416,8 +416,12 @@ " if (int4_model_dir / \"openvino_model.xml\").exists():\n", " return\n", " remote_code = llm_model_configuration.get(\"remote_code\", False)\n", - " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(pt_model_id)\n", - " int4_compression_args = \" --group-size {} --ratio {}\".format(model_compression_params[\"group_size\"], model_compression_params[\"ratio\"])\n", + " export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(\n", + " pt_model_id\n", + " )\n", + " int4_compression_args = \" --group-size {} --ratio {}\".format(\n", + " model_compression_params[\"group_size\"], model_compression_params[\"ratio\"]\n", + " )\n", " if model_compression_params[\"sym\"]:\n", " int4_compression_args += \" --sym\"\n", " export_command_base += int4_compression_args\n", @@ -427,7 +431,6 @@ " display(Markdown(\"**Export command:**\"))\n", " display(Markdown(f\"`{export_command}`\"))\n", " ! $export_command\n", - " \n", "\n", "\n", "if prepare_fp16_model.value:\n", @@ -869,8 +872,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", "\n", "# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy\n", "# issues caused by this, which we avoid by setting precision hint to \"f32\".\n", @@ -1239,8 +1241,7 @@ " css=\".disclaimer {font-variant-caps: all-small-caps;}\",\n", ") as demo:\n", " gr.Markdown(\"\"\"