From 4b0264059cabfcf10a5350c77851d456919128f8 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Mon, 22 Jan 2024 08:24:02 -0800
Subject: [PATCH] update the text encoder converter

update the text encoder converter

update the text encoder converter
---
 README.md                                     |   2 +-
 .../279-photo-maker/279-photo-maker.ipynb     | 110 ++++++++++++------
 2 files changed, 75 insertions(+), 37 deletions(-)
diff --git a/README.md b/README.md
index eaac01b38a8..228fbf75528 100644
--- a/README.md
+++ b/README.md
@@ -228,7 +228,7 @@ Demos that demonstrate inference on a particular model.
 | [275-llm-question-answering](notebooks/275-llm-question-answering)<br> | LLM Instruction following pipeline | <img src=https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/daafd702-5a42-4f54-ae72-2e4480d73501 width=225> | 
 | [276-stable-diffusion-torchdynamo-backend](notebooks/276-stable-diffusion-torchdynamo-backend/)<br> | Image generation with Stable Diffusion and OpenVINO™ `torch.compile` feature | <img src=https://user-images.githubusercontent.com/32199725/270969617-ee88eb42-3b0d-42f4-ac71-992c40451849.png width=225> |
 | [277-amused-lightweight-text-to-image](notebooks/277-amused-lightweight-text-to-image)<br>[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/277-amused-lightweight-text-to-image/277-amused-lightweight-text-to-image.ipynb)<br>| Lightweight image generation with aMUSEd and OpenVINO™ | <img src=https://huggingface.co/amused/amused-256/resolve/main/assets/collage_small.png width=225> | 
-| [279-photo-maker](notebooks/279-photo-maker)<br> | Text-to-image generation using PhotoMaker and OpenVINO | <img src=https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/88bccc4a-5789-42ca-8a68-f402c3e7c5a4 width=300> | 
+| [279-photo-maker](notebooks/279-photo-maker)<br> | Text-to-image generation using PhotoMaker and OpenVINO | <img src=https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/88bccc4a-5789-42ca-8a68-f402c3e7c5a4 width=225> | 
 
 
 <div id='-model-training'></div>
diff --git a/notebooks/279-photo-maker/279-photo-maker.ipynb b/notebooks/279-photo-maker/279-photo-maker.ipynb
index bbdef0e7518..a846680785d 100644
--- a/notebooks/279-photo-maker/279-photo-maker.ipynb
+++ b/notebooks/279-photo-maker/279-photo-maker.ipynb
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "3f706db4-b4e5-4d2b-94ae-e4ae2f6008a5",
    "metadata": {},
    "outputs": [],
@@ -80,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "a25fb368",
    "metadata": {},
    "outputs": [
@@ -101,7 +101,7 @@
    "source": [
     "%pip uninstall -q -y openvino-dev openvino openvino-nightly\n",
     "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu\\\n",
-    "transformers diffusers  gradio openvino-nightly \"git+https://github.com/huggingface/optimum-intel.git\""
+    "transformers diffusers gradio openvino-nightly \"git+https://github.com/huggingface/optimum-intel.git\""
    ]
   },
   {
@@ -114,7 +114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "id": "465e7bd8-eacd-45cd-9a8b-d6019ba6e0f6",
    "metadata": {},
    "outputs": [],
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "id": "7328db41-0de6-4f26-b9f6-70c9df1a1be6",
    "metadata": {},
    "outputs": [
@@ -150,15 +150,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-01-22 05:58:54.312349: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2024-01-22 05:58:54.448593: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2024-01-22 05:58:55.079077: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-      "2024-01-22 05:58:55.079133: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-      "2024-01-22 05:58:55.081870: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2024-01-22 05:58:55.401649: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2024-01-22 05:58:55.402444: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2024-01-22 19:57:32.199838: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2024-01-22 19:57:32.202465: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-01-22 19:57:32.233487: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2024-01-22 19:57:32.233508: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2024-01-22 19:57:32.233534: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2024-01-22 19:57:32.239613: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-01-22 19:57:32.240689: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2024-01-22 05:58:56.401913: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+      "2024-01-22 19:57:32.915326: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
      ]
     }
    ],
@@ -204,14 +204,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 7,
    "id": "d737c5da-6f7f-455f-b2ec-e89d379ba8bf",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "141a679ec73d4063af80b0828eb220fe",
+       "model_id": "a9ced8bd000746b48bcd9e73916a724a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -229,6 +229,14 @@
       "Loading PhotoMaker components [1] id_encoder from [/home/ethan/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/3602d02ba7cc99ce8886e24063ed10e4f2510c84]...\n",
       "Loading PhotoMaker components [2] lora_weights from [/home/ethan/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/3602d02ba7cc99ce8886e24063ed10e4f2510c84]\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/loaders/lora.py:1077: FutureWarning: `fuse_text_encoder_lora` is deprecated and will be removed in version 0.27. You are using an old version of LoRA backend. This will be deprecated in the next releases in favor of PEFT make sure to install the latest PEFT and transformers packages in the future.\n",
+      "  deprecate(\"fuse_text_encoder_lora\", \"0.27\", LORA_DEPRECATION_MESSAGE)\n"
+     ]
     }
    ],
    "source": [
@@ -265,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "950b84d0-6476-4bb5-a118-89b7f0ec652d",
    "metadata": {},
    "outputs": [],
@@ -344,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "9d02e1c4-00fc-403b-ac3a-9052a0a66852",
    "metadata": {},
    "outputs": [
@@ -391,12 +399,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 10,
    "id": "f38e2043-6de2-49e3-8031-0cf260e4df55",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if input_shape[-1] > 1 or self.sliding_window is not None:\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/transformers/modeling_attn_mask_utils.py:161: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if past_key_values_length > 0:\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/transformers/models/clip/modeling_clip.py:281: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len):\n"
+     ]
+    }
+   ],
    "source": [
     "text_encoder.config.output_hidden_states = True\n",
+    "text_encoder.config.return_dict = False\n",
     "\n",
     "inputs = {\n",
     "    \"input_ids\": torch.ones((1, 77), dtype=torch.long)\n",
@@ -420,10 +442,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 11,
    "id": "6cfcdc45-773d-404d-a5da-db4a99f80254",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/models/unet_2d_condition.py:915: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if dim % default_overall_up_factor != 0:\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/models/downsampling.py:135: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  assert hidden_states.shape[1] == self.channels\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/models/downsampling.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  assert hidden_states.shape[1] == self.channels\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/models/upsampling.py:149: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  assert hidden_states.shape[1] == self.channels\n",
+      "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/diffusers/models/upsampling.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if hidden_states.shape[0] >= 64:\n"
+     ]
+    }
+   ],
    "source": [
     "class UnetWrapper(torch.nn.Module):\n",
     "    def __init__(self, unet):\n",
@@ -475,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 12,
    "id": "f00ad717-897e-4b2a-ba59-0afa88573569",
    "metadata": {},
    "outputs": [],
@@ -492,8 +531,7 @@
     "w_vae_decoder = VAEDecoderWrapper(vae_decoder)\n",
     "inputs = torch.zeros((1, 4, 128, 128))\n",
     "\n",
-    "\n",
-    "convert(w_vae_decoder, str(VAE_DECODER_OV_PATH), inputs, input_info=[1, 4, 128, 128])"
+    "convert(w_vae_decoder, VAE_DECODER_OV_PATH, inputs, input_info=[1, 4, 128, 128])"
    ]
   },
   {
@@ -514,14 +552,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 13,
    "id": "dffa0201-5d5a-4710-9cdd-30c116f558b0",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0defdc75abdf4bbfa390dd4c8a8ad561",
+       "model_id": "d261304502354fc3a1cae26bfac75c93",
        "version_major": 2,
        "version_minor": 0
       },
@@ -529,7 +567,7 @@
        "Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU')"
       ]
      },
-     "execution_count": 78,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -563,7 +601,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 14,
    "id": "a713b243-37be-45a0-81e1-de97531fbc02",
    "metadata": {},
    "outputs": [],
@@ -576,7 +614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 15,
    "id": "e682f1c2-7831-474c-838f-bdbf8fcbb7ec",
    "metadata": {},
    "outputs": [],
@@ -609,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 16,
    "id": "60dcd0d6-00dc-400c-b4d5-fb5cc7e49112",
    "metadata": {},
    "outputs": [],
@@ -640,7 +678,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 17,
    "id": "08b255de-1a10-4ff9-b6cf-63ab943269a9",
    "metadata": {},
    "outputs": [],
@@ -670,7 +708,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 18,
    "id": "c09b9401-2992-481a-b8e2-45e23f43bb04",
    "metadata": {},
    "outputs": [],
@@ -691,7 +729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 19,
    "id": "4954fb06-25be-45cc-b6fa-c3ae92c045ac",
    "metadata": {},
    "outputs": [],
@@ -714,14 +752,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 20,
    "id": "1b4f0f86-44be-43c1-a260-a102f5407b65",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "253e1c3baa0f4b22b68dbb47a9be7d71",
+       "model_id": "8b6043435ba74faa9bd23e8f49f8e26d",
        "version_major": 2,
        "version_minor": 0
       },
@@ -764,7 +802,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 21,
    "id": "c692c794-9d61-4463-957d-c35386e04f01",
    "metadata": {},
    "outputs": [