From 80936bfe5c80dfe466776c2ec0ec617ad708cfee Mon Sep 17 00:00:00 2001
From: gsa9989 <gsa9989@nyu.edu>
Date: Thu, 18 Jul 2024 17:03:09 -0700
Subject: [PATCH 01/44] Added Cosmos DB NoSQL Semantic Cache Integration with
 tests and jupyter notebook

---
 docs/docs/integrations/llm_caching.ipynb      | 888 ++++--------------
 libs/community/langchain_community/cache.py   | 295 ++++--
 .../cache/test_azure_cosmosdbnosql_cache.py   | 265 ++++++
 3 files changed, 651 insertions(+), 797 deletions(-)
 create mode 100644 libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index dba454c231c0d..0fa172959fb8a 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -12,12 +12,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "10ad9224",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-04-12T02:05:57.319706Z",
-     "start_time": "2024-04-12T02:05:57.303868Z"
+     "end_time": "2024-07-18T21:25:25.957506Z",
+     "start_time": "2024-07-18T21:25:25.202642Z"
     }
    },
    "outputs": [],
@@ -26,7 +26,7 @@
     "from langchain_openai import OpenAI\n",
     "\n",
     "# To make the caching really obvious, lets use a slower model.\n",
-    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
+    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2,openai_api_key='OPEN-AI-KEY')"
    ]
   },
   {
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "426ff912",
    "metadata": {},
    "outputs": [],
@@ -53,29 +53,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "64005d1f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 52.2 ms, sys: 15.2 ms, total: 67.4 ms\n",
-      "Wall time: 1.19 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -84,29 +65,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "c8a1cb2b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 191 µs, sys: 11 µs, total: 202 µs\n",
-      "Wall time: 205 µs\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -135,7 +97,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "5f036236",
    "metadata": {},
    "outputs": [],
@@ -148,29 +110,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "fa18e3af",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 33.2 ms, sys: 18.1 ms, total: 51.2 ms\n",
-      "Wall time: 667 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -179,31 +122,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "5bf2f6fd",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 4.86 ms, sys: 1.97 ms, total: 6.83 ms\n",
-      "Wall time: 5.79 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -229,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "f3920f25",
    "metadata": {},
    "outputs": [],
@@ -246,29 +170,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "3bf7d959",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 7.56 ms, sys: 2.98 ms, total: 10.5 ms\n",
-      "Wall time: 1.14 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -277,29 +182,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": null,
    "id": "00fc3a34",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 2.78 ms, sys: 1.95 ms, total: 4.73 ms\n",
-      "Wall time: 82.9 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -328,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "39f6eb0b",
    "metadata": {},
    "outputs": [],
@@ -346,26 +232,7 @@
    "execution_count": null,
    "id": "28920749",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 6.88 ms, sys: 8.75 ms, total: 15.6 ms\n",
-      "Wall time: 1.04 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -374,29 +241,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "94bf9415",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 1.59 ms, sys: 610 µs, total: 2.2 ms\n",
-      "Wall time: 5.58 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -414,7 +262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "64df3099",
    "metadata": {},
    "outputs": [],
@@ -429,29 +277,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "8e91d3ac",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 351 ms, sys: 156 ms, total: 507 ms\n",
-      "Wall time: 3.37 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy don't scientists trust atoms?\\nBecause they make up everything.\""
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -460,29 +289,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "df856948",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 6.25 ms, sys: 2.72 ms, total: 8.97 ms\n",
-      "Wall time: 262 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy don't scientists trust atoms?\\nBecause they make up everything.\""
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time, while not a direct hit, the question is semantically similar to the original question,\n",
@@ -507,7 +317,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "14a82124",
    "metadata": {},
    "outputs": [],
@@ -540,26 +350,7 @@
    "execution_count": null,
    "id": "9e4ecfd1",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 21.5 ms, sys: 21.3 ms, total: 42.8 ms\n",
-      "Wall time: 6.2 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -568,29 +359,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "c98bbe3b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 571 µs, sys: 43 µs, total: 614 µs\n",
-      "Wall time: 635 µs\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -607,7 +379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "b3c663bb",
    "metadata": {},
    "outputs": [],
@@ -633,29 +405,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "8c273ced",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 1.42 s, sys: 279 ms, total: 1.7 s\n",
-      "Wall time: 8.44 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -664,29 +417,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "93e21a5f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 866 ms, sys: 20 ms, total: 886 ms\n",
-      "Wall time: 226 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# This is an exact match, so it finds it in the cache\n",
@@ -695,29 +429,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "c4bb024b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 853 ms, sys: 14.8 ms, total: 868 ms\n",
-      "Wall time: 224 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# This is not an exact match, but semantically within distance so it hits!\n",
@@ -835,7 +550,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "2005f03a",
    "metadata": {},
    "outputs": [],
@@ -851,29 +566,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "c6a6c238",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 40.7 ms, sys: 16.5 ms, total: 57.2 ms\n",
-      "Wall time: 1.73 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -882,29 +578,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "b8f78f9d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 3.16 ms, sys: 2.98 ms, total: 6.14 ms\n",
-      "Wall time: 57.9 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time it is, so it goes faster\n",
@@ -1044,7 +721,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "e4b898a5-fe0e-4f11-a87b-7979652322a7",
    "metadata": {},
    "outputs": [],
@@ -1065,18 +742,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "992267dc-0d19-45e0-9a13-ccbb6348d804",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CASSANDRA_KEYSPACE =  demo_keyspace\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import cassio\n",
     "\n",
@@ -1101,20 +770,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "ead97077-cc79-4f5c-940c-91eb21650466",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ASTRA_DB_ID =  01234567-89ab-cdef-0123-456789abcdef\n",
-      "ASTRA_DB_APPLICATION_TOKEN =  ········\n",
-      "ASTRA_DB_KEYSPACE (optional, can be left empty) =  my_keyspace\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import getpass\n",
     "\n",
@@ -1130,7 +789,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "cc53ce1b",
    "metadata": {},
    "outputs": [],
@@ -1156,7 +815,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "00a5e66f",
    "metadata": {},
    "outputs": [],
@@ -1169,22 +828,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "956a5145",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n",
-      "CPU times: user 92.5 ms, sys: 8.89 ms, total: 101 ms\n",
-      "Wall time: 1.98 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1193,22 +840,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "158f0151",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n",
-      "CPU times: user 5.51 ms, sys: 0 ns, total: 5.51 ms\n",
-      "Wall time: 5.78 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1227,7 +862,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "b9ad3f54",
    "metadata": {},
    "outputs": [],
@@ -1239,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "4623f95e",
    "metadata": {},
    "outputs": [],
@@ -1257,22 +892,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "1a8e577b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n",
-      "CPU times: user 49.5 ms, sys: 7.38 ms, total: 56.9 ms\n",
-      "Wall time: 2.55 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1281,22 +904,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "f7abddfd",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n",
-      "CPU times: user 21.2 ms, sys: 3.38 ms, total: 24.6 ms\n",
-      "Wall time: 532 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1336,19 +947,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "feb510b6-99a3-4228-8e11-563051f8178e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ASTRA_DB_API_ENDPOINT =  https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
-      "ASTRA_DB_APPLICATION_TOKEN =  ········\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import getpass\n",
     "\n",
@@ -1368,7 +970,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "ad63c146-ee41-4896-90ee-29fcc39f0ed5",
    "metadata": {},
    "outputs": [],
@@ -1386,22 +988,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "83e0fb02-e8eb-4483-9eb1-55b5e14c4487",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n",
-      "CPU times: user 70.8 ms, sys: 4.13 ms, total: 74.9 ms\n",
-      "Wall time: 2.06 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1410,22 +1000,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "4d20d498-fe28-4e26-8531-2b31c52ee687",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n",
-      "CPU times: user 15.1 ms, sys: 3.7 ms, total: 18.8 ms\n",
-      "Wall time: 531 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1444,7 +1022,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "dc329c55-1cc4-4b74-94f9-61f8990fb214",
    "metadata": {},
    "outputs": [],
@@ -1456,7 +1034,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "83952a90-ab14-4e59-87c0-d2bdc1d43e43",
    "metadata": {},
    "outputs": [],
@@ -1475,22 +1053,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "d74b249a-94d5-42d0-af74-f7565a994dea",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n",
-      "CPU times: user 65.6 ms, sys: 15.3 ms, total: 80.9 ms\n",
-      "Wall time: 2.72 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1499,22 +1065,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "11973d73-d2f4-46bd-b229-1c589df9b788",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n",
-      "CPU times: user 29.3 ms, sys: 6.21 ms, total: 35.5 ms\n",
-      "Wall time: 1.03 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -1538,7 +1092,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "4a9d592db01b11b2",
    "metadata": {
     "ExecuteTime": {
@@ -1598,7 +1152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": null,
    "id": "14ca942820e8140c",
    "metadata": {
     "ExecuteTime": {
@@ -1610,26 +1164,7 @@
      "outputs_hidden": false
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 45.6 ms, sys: 19.7 ms, total: 65.3 ms\n",
-      "Wall time: 2.29 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy was the math book sad? Because it had too many problems.'"
-      ]
-     },
-     "execution_count": 82,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -1638,7 +1173,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": null,
    "id": "bc1570a2a77b58c8",
    "metadata": {
     "ExecuteTime": {
@@ -1646,32 +1181,105 @@
      "start_time": "2024-03-12T00:13:03.159428Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 9.61 ms, sys: 3.42 ms, total: 13 ms\n",
-      "Wall time: 474 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy was the math book sad? Because it had too many problems.'"
-      ]
-     },
-     "execution_count": 83,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm(\"Tell me a joke\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9b45a23b-24f6-45e3-a507-1f964369964d",
+   "metadata": {},
+   "source": [
+    "## Azure Cosmos DB NoSql Semantic Cache\n",
+    "\n",
+    "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f73a343a-f211-40ab-b1a1-a26ac00fc0c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.cosmos import CosmosClient, PartitionKey\n",
+    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "\n",
+    "URI = 'COSMOSDB_URI'\n",
+    "KEY = 'COSMOSDB_KEY'\n",
+    "client = CosmosClient(URI, credential=KEY)\n",
+    "\n",
+    "indexing_policy = {\n",
+    "    \"indexingMode\": \"consistent\",\n",
+    "    \"includedPaths\": [{\"path\": \"/*\"}],\n",
+    "    \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n",
+    "    \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"quantizedFlat\"}],\n",
+    "}\n",
+    "\n",
+    "vector_embedding_policy = {\n",
+    "    \"vectorEmbeddings\": [\n",
+    "        {\n",
+    "            \"path\": \"/embedding\",\n",
+    "            \"dataType\": \"float32\",\n",
+    "            \"distanceFunction\": \"cosine\",\n",
+    "            \"dimensions\": 1536,\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "partition_key = PartitionKey(path=\"/id\")\n",
+    "cosmos_container_properties_test = {\"partition_key\": partition_key}\n",
+    "cosmos_database_properties_test = {}\n",
+    "\n",
+    "#create llm cache\n",
+    "set_llm_cache(\n",
+    "    AzureCosmosDBNoSqlSemanticCache(\n",
+    "        cosmos_client=test_client,\n",
+    "        embedding=FakeEmbeddings(),\n",
+    "        vector_embedding_policy=vector_embedding_policy,\n",
+    "        indexing_policy=indexing_policy,\n",
+    "        cosmos_container_properties=cosmos_container_properties_test,\n",
+    "        cosmos_database_properties=cosmos_database_properties_test,\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65b7b41b-2da4-48b4-8505-bf6173f7ecf9",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
     "llm(\"Tell me a joke\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "def60c91-c52d-465c-867f-4c19c34c7859",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# The second time, while not a direct hit, the question is semantically similar to the original question,\n",
+    "# so it uses the cached result!\n",
+    "llm(\"Tell me a joke\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd1f63c4-1214-401f-995b-3279ae8979de",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "306ff47b",
@@ -1819,7 +1427,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "6af46e2b",
    "metadata": {},
    "outputs": [],
@@ -1829,29 +1437,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "26c4fd8f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 5.8 ms, sys: 2.71 ms, total: 8.51 ms\n",
-      "Wall time: 745 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "llm(\"Tell me a joke\")"
@@ -1859,29 +1448,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "46846b20",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 4.91 ms, sys: 2.64 ms, total: 7.55 ms\n",
-      "Wall time: 623 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nTwo guys stole a calendar. They got six months each.'"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "llm(\"Tell me a joke\")"
@@ -1903,7 +1473,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "9afa3f7a",
    "metadata": {},
    "outputs": [],
@@ -1914,7 +1484,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "98a78e8e",
    "metadata": {},
    "outputs": [],
@@ -1926,7 +1496,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "2bfb099b",
    "metadata": {},
    "outputs": [],
@@ -1938,7 +1508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "f78b7f51",
    "metadata": {},
    "outputs": [],
@@ -1951,7 +1521,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "a2a30822",
    "metadata": {},
    "outputs": [],
@@ -1961,29 +1531,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "a545b743",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 452 ms, sys: 60.3 ms, total: 512 ms\n",
-      "Wall time: 5.09 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure. In response to Russian aggression in Ukraine, the United States is joining with European allies to impose sanctions and isolate Russia. American forces are being mobilized to protect NATO countries in the event that Putin decides to keep moving west. The Ukrainians are bravely fighting back, but the next few weeks will be hard for them. Putin will pay a high price for his actions in the long run. Americans should not be alarmed, as the United States is taking action to protect its interests and allies.'"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "chain.run(docs)"
@@ -1999,29 +1550,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "39cbb282",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 11.5 ms, sys: 4.33 ms, total: 15.8 ms\n",
-      "Wall time: 1.04 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure.'"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "chain.run(docs)"
@@ -2054,7 +1586,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "7379fd5aa83ee500",
    "metadata": {
     "ExecuteTime": {
@@ -2076,7 +1608,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "fecb26634bf27e93",
    "metadata": {
     "ExecuteTime": {
@@ -2084,26 +1616,7 @@
      "start_time": "2024-04-12T02:06:07.178381Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 39.4 ms, sys: 11.8 ms, total: 51.2 ms\n",
-      "Wall time: 1.55 s\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything.\""
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
@@ -2112,7 +1625,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "43b24b725ea4ba98",
    "metadata": {
     "ExecuteTime": {
@@ -2120,26 +1633,7 @@
      "start_time": "2024-04-12T02:06:11.957571Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 4.66 ms, sys: 1.1 ms, total: 5.76 ms\n",
-      "Wall time: 113 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything.\""
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The second time, while not a direct hit, the question is semantically similar to the original question,\n",
@@ -2229,7 +1723,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 4a09ba61969bc..208de8d5d5e21 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -59,6 +59,9 @@
     CosmosDBVectorSearchType,
 )
 
+
+# from libs.community.langchain_community.vectorstores.azure_cosmos_db_no_sql import AzureCosmosDBNoSqlVectorSearch
+
 try:
     from sqlalchemy.orm import declarative_base
 except ImportError:
@@ -79,7 +82,7 @@
 from langchain_community.utilities.astradb import (
     _AstraDBCollectionEnvironment,
 )
-from langchain_community.vectorstores import AzureCosmosDBVectorSearch
+from langchain_community.vectorstores import AzureCosmosDBVectorSearch, AzureCosmosDBNoSqlVectorSearch
 from langchain_community.vectorstores import (
     OpenSearchVectorSearch as OpenSearchVectorStore,
 )
@@ -218,7 +221,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
         return self.lookup(prompt, llm_string)
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string."""
         self.update(prompt, llm_string, return_val)
@@ -404,7 +407,7 @@ def _ensure_generation_type(return_val: RETURN_VAL_TYPE) -> None:
 
     @staticmethod
     def _get_generations(
-        results: dict[str | bytes, str | bytes],
+            results: dict[str | bytes, str | bytes],
     ) -> Optional[List[Generation]]:
         generations = []
         if results:
@@ -425,7 +428,7 @@ def _get_generations(
 
     @staticmethod
     def _configure_pipeline_for_update(
-        key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
+            key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
     ) -> None:
         pipe.hset(
             key,
@@ -564,7 +567,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string. Async version."""
         self._ensure_generation_type(return_val)
@@ -609,7 +612,7 @@ class RedisSemanticCache(BaseCache):
     }
 
     def __init__(
-        self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
+            self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """Initialize by passing in the `init` GPTCache func
 
@@ -729,10 +732,10 @@ class GPTCache(BaseCache):
     """Cache that uses GPTCache as a backend."""
 
     def __init__(
-        self,
-        init_func: Union[
-            Callable[[Any, str], None], Callable[[Any], None], None
-        ] = None,
+            self,
+            init_func: Union[
+                Callable[[Any, str], None], Callable[[Any], None], None
+            ] = None,
     ):
         """Initialize by passing in init function (default: `None`).
 
@@ -860,7 +863,7 @@ def _ensure_cache_exists(cache_client: momento.CacheClient, cache_name: str) ->
 
     create_cache_response = cache_client.create_cache(cache_name)
     if isinstance(create_cache_response, CreateCache.Success) or isinstance(
-        create_cache_response, CreateCache.CacheAlreadyExists
+            create_cache_response, CreateCache.CacheAlreadyExists
     ):
         return None
     elif isinstance(create_cache_response, CreateCache.Error):
@@ -878,12 +881,12 @@ class MomentoCache(BaseCache):
     """Cache that uses Momento as a backend. See https://gomomento.com/"""
 
     def __init__(
-        self,
-        cache_client: momento.CacheClient,
-        cache_name: str,
-        *,
-        ttl: Optional[timedelta] = None,
-        ensure_cache_exists: bool = True,
+            self,
+            cache_client: momento.CacheClient,
+            cache_name: str,
+            *,
+            ttl: Optional[timedelta] = None,
+            ensure_cache_exists: bool = True,
     ):
         """Instantiate a prompt cache using Momento as a backend.
 
@@ -922,14 +925,14 @@ def __init__(
 
     @classmethod
     def from_client_params(
-        cls,
-        cache_name: str,
-        ttl: timedelta,
-        *,
-        configuration: Optional[momento.config.Configuration] = None,
-        api_key: Optional[str] = None,
-        auth_token: Optional[str] = None,  # for backwards compatibility
-        **kwargs: Any,
+            cls,
+            cache_name: str,
+            ttl: timedelta,
+            *,
+            configuration: Optional[momento.config.Configuration] = None,
+            api_key: Optional[str] = None,
+            auth_token: Optional[str] = None,  # for backwards compatibility
+            **kwargs: Any,
     ) -> MomentoCache:
         """Construct cache from CacheClient parameters."""
         try:
@@ -1085,13 +1088,13 @@ class CassandraCache(BaseCache):
     """
 
     def __init__(
-        self,
-        session: Optional[CassandraSession] = None,
-        keyspace: Optional[str] = None,
-        table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
-        ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
-        skip_provisioning: bool = False,
-        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+            self,
+            session: Optional[CassandraSession] = None,
+            keyspace: Optional[str] = None,
+            table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
+            ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
+            skip_provisioning: bool = False,
+            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1159,7 +1162,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         blob = _dumps_generations(return_val)
         await self.kv_cache.aput(
@@ -1169,7 +1172,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1274,17 +1277,17 @@ class CassandraSemanticCache(BaseCache):
     """
 
     def __init__(
-        self,
-        session: Optional[CassandraSession] = None,
-        keyspace: Optional[str] = None,
-        embedding: Optional[Embeddings] = None,
-        table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
-        distance_metric: Optional[str] = None,
-        score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
-        ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
-        skip_provisioning: bool = False,
-        similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
-        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+            self,
+            session: Optional[CassandraSession] = None,
+            keyspace: Optional[str] = None,
+            embedding: Optional[Embeddings] = None,
+            table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
+            distance_metric: Optional[str] = None,
+            score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
+            ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
+            skip_provisioning: bool = False,
+            similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
+            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1388,7 +1391,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         embedding_vector = await self._aget_embedding(text=prompt)
         llm_string_hash = _hash(llm_string)
@@ -1421,7 +1424,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1452,7 +1455,7 @@ def lookup_with_id(
             return None
 
     async def alookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1483,7 +1486,7 @@ async def alookup_with_id(
             return None
 
     def lookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -1492,7 +1495,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -1543,7 +1546,7 @@ class SQLAlchemyMd5Cache(BaseCache):
     """Cache that uses SQAlchemy as a backend."""
 
     def __init__(
-        self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
+            self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
     ):
         """Initialize by creating all tables."""
         self.engine = engine
@@ -1621,16 +1624,16 @@ def _make_id(prompt: str, llm_string: str) -> str:
         return f"{_hash(prompt)}#{_hash(llm_string)}"
 
     def __init__(
-        self,
-        *,
-        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-        token: Optional[str] = None,
-        api_endpoint: Optional[str] = None,
-        astra_db_client: Optional[AstraDB] = None,
-        async_astra_db_client: Optional[AsyncAstraDB] = None,
-        namespace: Optional[str] = None,
-        pre_delete_collection: bool = False,
-        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+            self,
+            *,
+            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+            token: Optional[str] = None,
+            api_endpoint: Optional[str] = None,
+            astra_db_client: Optional[AstraDB] = None,
+            async_astra_db_client: Optional[AsyncAstraDB] = None,
+            namespace: Optional[str] = None,
+            pre_delete_collection: bool = False,
+            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
     ):
         """
         Cache that uses Astra DB as a backend.
@@ -1711,7 +1714,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1724,7 +1727,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1738,7 +1741,7 @@ def delete_through_llm(
         return self.delete(prompt, llm_string=llm_string)
 
     async def adelete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `adelete` with the LLM being passed.
@@ -1778,7 +1781,6 @@ async def aclear(self, **kwargs: Any) -> None:
 ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_semantic_cache"
 ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE = 16
 
-
 _unset = ["unset"]
 
 
@@ -1822,19 +1824,19 @@ def decorating_function(user_function: Callable) -> Callable:
 )
 class AstraDBSemanticCache(BaseCache):
     def __init__(
-        self,
-        *,
-        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-        token: Optional[str] = None,
-        api_endpoint: Optional[str] = None,
-        astra_db_client: Optional[AstraDB] = None,
-        async_astra_db_client: Optional[AsyncAstraDB] = None,
-        namespace: Optional[str] = None,
-        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
-        pre_delete_collection: bool = False,
-        embedding: Embeddings,
-        metric: Optional[str] = None,
-        similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
+            self,
+            *,
+            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+            token: Optional[str] = None,
+            api_endpoint: Optional[str] = None,
+            astra_db_client: Optional[AstraDB] = None,
+            async_astra_db_client: Optional[AsyncAstraDB] = None,
+            namespace: Optional[str] = None,
+            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+            pre_delete_collection: bool = False,
+            embedding: Embeddings,
+            metric: Optional[str] = None,
+            similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
     ):
         """
         Cache that uses Astra DB as a vector-store backend for semantic
@@ -1940,7 +1942,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1972,7 +1974,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2002,7 +2004,7 @@ def lookup_with_id(
                 return None
 
     async def alookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2032,7 +2034,7 @@ async def alookup_with_id(
                 return None
 
     def lookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -2041,7 +2043,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -2085,22 +2087,22 @@ class AzureCosmosDBSemanticCache(BaseCache):
     DEFAULT_COLLECTION_NAME = "CosmosMongoVCoreCacheColl"
 
     def __init__(
-        self,
-        cosmosdb_connection_string: str,
-        database_name: str,
-        collection_name: str,
-        embedding: Embeddings,
-        *,
-        cosmosdb_client: Optional[Any] = None,
-        num_lists: int = 100,
-        similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
-        kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
-        dimensions: int = 1536,
-        m: int = 16,
-        ef_construction: int = 64,
-        ef_search: int = 40,
-        score_threshold: Optional[float] = None,
-        application_name: str = "LANGCHAIN_CACHING_PYTHON",
+            self,
+            cosmosdb_connection_string: str,
+            database_name: str,
+            collection_name: str,
+            embedding: Embeddings,
+            *,
+            cosmosdb_client: Optional[Any] = None,
+            num_lists: int = 100,
+            similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
+            kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
+            dimensions: int = 1536,
+            m: int = 16,
+            ef_construction: int = 64,
+            ef_search: int = 40,
+            score_threshold: Optional[float] = None,
+            application_name: str = "LANGCHAIN_CACHING_PYTHON",
     ):
         """
         Args:
@@ -2271,13 +2273,106 @@ def clear(self, **kwargs: Any) -> None:
     def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None:
         if not isinstance(value, enum_type):
             raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.")
+class AzureCosmosDBNoSqlSemanticCache(BaseCache):
+    """Cache that uses Cosmos DB NoSQL backend"""
+
+    def __init__(
+            self,
+            embedding: Embeddings,
+            cosmos_client: Optional[Any] = None,
+            database_name: str = "CosmosNoSqlCacheDB",
+            container_name: str = "CosmosNoSqlCacheContainer",
+            *,
+            vector_embedding_policy: Optional[Dict[str, Any]] = None,
+            indexing_policy: Optional[Dict[str, Any]] = None,
+            cosmos_container_properties: Dict[str, Any],
+            cosmos_database_properties: Dict[str, Any]
+    ):
+        self.cosmos_client = cosmos_client
+        self.database_name = database_name
+        self.container_name = container_name
+        self.embedding = embedding
+        self.vector_embedding_policy = vector_embedding_policy
+        self.indexing_policy = indexing_policy
+        self.cosmos_container_properties = cosmos_container_properties
+        self.cosmos_database_properties = cosmos_database_properties
+        self._cache_: Optional[AzureCosmosDBNoSqlVectorSearch] = None
+
+    def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
+
+        #create new vectorstore client to create the cache
+        if self.cosmos_client:
+            self._cache_ = AzureCosmosDBNoSqlVectorSearch(
+                cosmos_client=self.cosmos_client,
+                embedding=self.embedding,
+                vector_embedding_policy=self.vector_embedding_policy,
+                indexing_policy=self.indexing_policy,
+                cosmos_container_properties=self.cosmos_container_properties,
+                cosmos_database_properties=self.cosmos_database_properties,
+                database_name=self.database_name,
+                container_name=self.container_name
+            )
+
+        return self._cache_
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt."""
+        if not self._cache_:
+            self._cache_ = self._create_llm_cache(llm_string)
+        llm_cache = self._cache_
+        generations: List = []
+        # Read from a Hash
+        results = llm_cache.similarity_search(
+            query=prompt,
+            k=1,
+        )
+        if results:
+            for document in results:
+                try:
+                    generations.extend(loads(document.metadata["return_val"]))
+                except Exception:
+                    logger.warning(
+                        "Retrieving a cache value that could not be deserialized "
+                        "properly. This is likely due to the cache being in an "
+                        "older format. Please recreate your cache to avoid this "
+                        "error."
+                    )
+                    # In a previous life we stored the raw text directly
+                    # in the table, so assume it's in that format.
+                    generations.extend(
+                        _load_generations_from_json(document.metadata["return_val"])
+                    )
+        return generations if generations else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "CosmosDBNoSqlSemanticCache only supports caching of "
+                    f"normal LLM generations, got {type(gen)}"
+                )
+        if not self._cache_:
+            self._cache_ = self._create_llm_cache(llm_string)
+        llm_cache = self._cache_
+        metadata = {
+            "llm_string": llm_string,
+            "prompt": prompt,
+            "return_val": dumps([g for g in return_val]),
+        }
+        llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
 
+    def clear(self, **kwargs: Any) -> None:
+        """Clear semantic cache for a given llm_string."""
+        database = self.cosmos_client.get_database_client(self.database_name)
+        container = database.get_container_client(self.container_name)
+        database.delete_container(self.container_name)
 
 class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 
     def __init__(
-        self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
+            self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """
         Args:
diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
new file mode 100644
index 0000000000000..0a3b0d73c3024
--- /dev/null
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -0,0 +1,265 @@
+"""Test Azure CosmosDB NoSql cache functionality.
+"""
+import os
+import uuid
+
+import pytest
+from azure.cosmos import CosmosClient, PartitionKey
+from langchain.globals import get_llm_cache, set_llm_cache
+from langchain_core.outputs import Generation
+
+from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
+from langchain_community.vectorstores import AzureCosmosDBNoSqlVectorSearch
+
+from libs.community.tests.integration_tests.cache.fake_embeddings import (
+    FakeEmbeddings,
+)
+from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
+
+
+URI = 'COSMOSDB_URI'
+KEY = 'COSMOSDB_KEY'
+test_client = CosmosClient(URL, credential=KEY)
+
+indexing_policy = {
+    "indexingMode": "consistent",
+    "includedPaths": [{"path": "/*"}],
+    "excludedPaths": [{"path": '/"_etag"/?'}],
+    "vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
+}
+
+vector_embedding_policy = {
+    "vectorEmbeddings": [
+        {
+            "path": "/embedding",
+            "dataType": "float32",
+            "distanceFunction": "cosine",
+            "dimensions": 1536,
+        }
+    ]
+}
+
+partition_key = PartitionKey(path="/id")
+cosmos_container_properties_test = {"partition_key": partition_key}
+cosmos_database_properties_test = {}
+
+# @pytest.fixture(scope="session")
+def test_azure_cosmos_db_nosql_semantic_cache() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_semantic_cache_multi() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+
+def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+
+def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+
+def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+
+def test_azure_cosmos_db_semantic_cache_multi_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+
+def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy,
+            indexing_policy=indexing_policy,
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)

From e891cd914bf0e4884f31ff89f085f2d5a8992aae Mon Sep 17 00:00:00 2001
From: gsa9989 <gsa9989@nyu.edu>
Date: Thu, 18 Jul 2024 17:29:58 -0700
Subject: [PATCH 02/44] Removed openai_api_key parameter

---
 docs/docs/integrations/llm_caching.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 6abc992ba3785..faf3b9d39b8ab 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -26,7 +26,7 @@
     "from langchain_openai import OpenAI\n",
     "\n",
     "# To make the caching really obvious, lets use a slower model.\n",
-    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2,openai_api_key='OPEN-AI-KEY')"
+    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
    ]
   },
   {

From 676d762bffdd11c945f30ff9fcaa1000fdc9059c Mon Sep 17 00:00:00 2001
From: gsa9989 <gsa9989@nyu.edu>
Date: Thu, 18 Jul 2024 17:40:56 -0700
Subject: [PATCH 03/44] Removed unnecessary space changes

---
 libs/community/langchain_community/cache.py | 197 ++++++++++----------
 1 file changed, 99 insertions(+), 98 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index cecda82910774..70810be3e0e48 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -223,7 +223,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
         return self.lookup(prompt, llm_string)
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string."""
         self.update(prompt, llm_string, return_val)
@@ -409,7 +409,7 @@ def _ensure_generation_type(return_val: RETURN_VAL_TYPE) -> None:
 
     @staticmethod
     def _get_generations(
-            results: dict[str | bytes, str | bytes],
+        results: dict[str | bytes, str | bytes],
     ) -> Optional[List[Generation]]:
         generations = []
         if results:
@@ -430,7 +430,7 @@ def _get_generations(
 
     @staticmethod
     def _configure_pipeline_for_update(
-            key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
+        key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
     ) -> None:
         pipe.hset(
             key,
@@ -569,7 +569,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string. Async version."""
         self._ensure_generation_type(return_val)
@@ -614,7 +614,7 @@ class RedisSemanticCache(BaseCache):
     }
 
     def __init__(
-            self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
+        self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """Initialize by passing in the `init` GPTCache func
 
@@ -734,10 +734,10 @@ class GPTCache(BaseCache):
     """Cache that uses GPTCache as a backend."""
 
     def __init__(
-            self,
-            init_func: Union[
-                Callable[[Any, str], None], Callable[[Any], None], None
-            ] = None,
+        self,
+        init_func: Union[
+            Callable[[Any, str], None], Callable[[Any], None], None
+        ] = None,
     ):
         """Initialize by passing in init function (default: `None`).
 
@@ -865,7 +865,7 @@ def _ensure_cache_exists(cache_client: momento.CacheClient, cache_name: str) ->
 
     create_cache_response = cache_client.create_cache(cache_name)
     if isinstance(create_cache_response, CreateCache.Success) or isinstance(
-            create_cache_response, CreateCache.CacheAlreadyExists
+        create_cache_response, CreateCache.CacheAlreadyExists
     ):
         return None
     elif isinstance(create_cache_response, CreateCache.Error):
@@ -883,12 +883,12 @@ class MomentoCache(BaseCache):
     """Cache that uses Momento as a backend. See https://gomomento.com/"""
 
     def __init__(
-            self,
-            cache_client: momento.CacheClient,
-            cache_name: str,
-            *,
-            ttl: Optional[timedelta] = None,
-            ensure_cache_exists: bool = True,
+        self,
+        cache_client: momento.CacheClient,
+        cache_name: str,
+        *,
+        ttl: Optional[timedelta] = None,
+        ensure_cache_exists: bool = True,
     ):
         """Instantiate a prompt cache using Momento as a backend.
 
@@ -927,14 +927,14 @@ def __init__(
 
     @classmethod
     def from_client_params(
-            cls,
-            cache_name: str,
-            ttl: timedelta,
-            *,
-            configuration: Optional[momento.config.Configuration] = None,
-            api_key: Optional[str] = None,
-            auth_token: Optional[str] = None,  # for backwards compatibility
-            **kwargs: Any,
+        cls,
+        cache_name: str,
+        ttl: timedelta,
+        *,
+        configuration: Optional[momento.config.Configuration] = None,
+        api_key: Optional[str] = None,
+        auth_token: Optional[str] = None,  # for backwards compatibility
+        **kwargs: Any,
     ) -> MomentoCache:
         """Construct cache from CacheClient parameters."""
         try:
@@ -1090,13 +1090,13 @@ class CassandraCache(BaseCache):
     """
 
     def __init__(
-            self,
-            session: Optional[CassandraSession] = None,
-            keyspace: Optional[str] = None,
-            table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
-            ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
-            skip_provisioning: bool = False,
-            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+        self,
+        session: Optional[CassandraSession] = None,
+        keyspace: Optional[str] = None,
+        table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
+        ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
+        skip_provisioning: bool = False,
+        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1164,7 +1164,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         blob = _dumps_generations(return_val)
         await self.kv_cache.aput(
@@ -1174,7 +1174,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1279,17 +1279,17 @@ class CassandraSemanticCache(BaseCache):
     """
 
     def __init__(
-            self,
-            session: Optional[CassandraSession] = None,
-            keyspace: Optional[str] = None,
-            embedding: Optional[Embeddings] = None,
-            table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
-            distance_metric: Optional[str] = None,
-            score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
-            ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
-            skip_provisioning: bool = False,
-            similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
-            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+        self,
+        session: Optional[CassandraSession] = None,
+        keyspace: Optional[str] = None,
+        embedding: Optional[Embeddings] = None,
+        table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
+        distance_metric: Optional[str] = None,
+        score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
+        ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
+        skip_provisioning: bool = False,
+        similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
+        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1393,7 +1393,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         embedding_vector = await self._aget_embedding(text=prompt)
         llm_string_hash = _hash(llm_string)
@@ -1426,7 +1426,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1457,7 +1457,7 @@ def lookup_with_id(
             return None
 
     async def alookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1488,7 +1488,7 @@ async def alookup_with_id(
             return None
 
     def lookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -1497,7 +1497,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -1548,7 +1548,7 @@ class SQLAlchemyMd5Cache(BaseCache):
     """Cache that uses SQAlchemy as a backend."""
 
     def __init__(
-            self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
+        self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
     ):
         """Initialize by creating all tables."""
         self.engine = engine
@@ -1626,16 +1626,16 @@ def _make_id(prompt: str, llm_string: str) -> str:
         return f"{_hash(prompt)}#{_hash(llm_string)}"
 
     def __init__(
-            self,
-            *,
-            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-            token: Optional[str] = None,
-            api_endpoint: Optional[str] = None,
-            astra_db_client: Optional[AstraDB] = None,
-            async_astra_db_client: Optional[AsyncAstraDB] = None,
-            namespace: Optional[str] = None,
-            pre_delete_collection: bool = False,
-            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+        self,
+        *,
+        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+        token: Optional[str] = None,
+        api_endpoint: Optional[str] = None,
+        astra_db_client: Optional[AstraDB] = None,
+        async_astra_db_client: Optional[AsyncAstraDB] = None,
+        namespace: Optional[str] = None,
+        pre_delete_collection: bool = False,
+        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
     ):
         """
         Cache that uses Astra DB as a backend.
@@ -1716,7 +1716,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1729,7 +1729,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1743,7 +1743,7 @@ def delete_through_llm(
         return self.delete(prompt, llm_string=llm_string)
 
     async def adelete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `adelete` with the LLM being passed.
@@ -1783,6 +1783,7 @@ async def aclear(self, **kwargs: Any) -> None:
 ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_semantic_cache"
 ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE = 16
 
+
 _unset = ["unset"]
 
 
@@ -1826,19 +1827,19 @@ def decorating_function(user_function: Callable) -> Callable:
 )
 class AstraDBSemanticCache(BaseCache):
     def __init__(
-            self,
-            *,
-            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-            token: Optional[str] = None,
-            api_endpoint: Optional[str] = None,
-            astra_db_client: Optional[AstraDB] = None,
-            async_astra_db_client: Optional[AsyncAstraDB] = None,
-            namespace: Optional[str] = None,
-            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
-            pre_delete_collection: bool = False,
-            embedding: Embeddings,
-            metric: Optional[str] = None,
-            similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
+        self,
+        *,
+        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+        token: Optional[str] = None,
+        api_endpoint: Optional[str] = None,
+        astra_db_client: Optional[AstraDB] = None,
+        async_astra_db_client: Optional[AsyncAstraDB] = None,
+        namespace: Optional[str] = None,
+        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+        pre_delete_collection: bool = False,
+        embedding: Embeddings,
+        metric: Optional[str] = None,
+        similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
     ):
         """
         Cache that uses Astra DB as a vector-store backend for semantic
@@ -1944,7 +1945,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1976,7 +1977,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2006,7 +2007,7 @@ def lookup_with_id(
                 return None
 
     async def alookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2036,7 +2037,7 @@ async def alookup_with_id(
                 return None
 
     def lookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -2045,7 +2046,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -2089,22 +2090,22 @@ class AzureCosmosDBSemanticCache(BaseCache):
     DEFAULT_COLLECTION_NAME = "CosmosMongoVCoreCacheColl"
 
     def __init__(
-            self,
-            cosmosdb_connection_string: str,
-            database_name: str,
-            collection_name: str,
-            embedding: Embeddings,
-            *,
-            cosmosdb_client: Optional[Any] = None,
-            num_lists: int = 100,
-            similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
-            kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
-            dimensions: int = 1536,
-            m: int = 16,
-            ef_construction: int = 64,
-            ef_search: int = 40,
-            score_threshold: Optional[float] = None,
-            application_name: str = "LANGCHAIN_CACHING_PYTHON",
+        self,
+        cosmosdb_connection_string: str,
+        database_name: str,
+        collection_name: str,
+        embedding: Embeddings,
+        *,
+        cosmosdb_client: Optional[Any] = None,
+        num_lists: int = 100,
+        similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
+        kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
+        dimensions: int = 1536,
+        m: int = 16,
+        ef_construction: int = 64,
+        ef_search: int = 40,
+        score_threshold: Optional[float] = None,
+        application_name: str = "LANGCHAIN_CACHING_PYTHON",
     ):
         """
         Args:
@@ -2374,7 +2375,7 @@ class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 
     def __init__(
-            self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
+        self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """
         Args:

From 4701fb7fdc442dc5c120303a22f4f5032152575f Mon Sep 17 00:00:00 2001
From: gsa9989 <gsa9989@nyu.edu>
Date: Thu, 8 Aug 2024 13:59:06 -0700
Subject: [PATCH 04/44] test updates

---
 .../cache/test_azure_cosmosdbnosql_cache.py   | 135 ++++++------------
 1 file changed, 42 insertions(+), 93 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 0a3b0d73c3024..12a5b001275a1 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -17,40 +17,43 @@
 from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
 
 
-URI = 'COSMOSDB_URI'
-KEY = 'COSMOSDB_KEY'
-test_client = CosmosClient(URL, credential=KEY)
-
-indexing_policy = {
-    "indexingMode": "consistent",
-    "includedPaths": [{"path": "/*"}],
-    "excludedPaths": [{"path": '/"_etag"/?'}],
-    "vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
-}
-
-vector_embedding_policy = {
-    "vectorEmbeddings": [
-        {
-            "path": "/embedding",
-            "dataType": "float32",
-            "distanceFunction": "cosine",
-            "dimensions": 1536,
-        }
-    ]
-}
+URI = 'COSMOS_DB_URI'
+KEY = 'COSMOS_DB_KEY'
+test_client = CosmosClient(URI, credential=KEY)
+
+#cosine, euclidean, innerproduct
+def indexing_policy(index_type: str):
+    return {
+        "indexingMode": "consistent",
+        "includedPaths": [{"path": "/*"}],
+        "excludedPaths": [{"path": '/"_etag"/?'}],
+        "vectorIndexes": [{"path": "/embedding", "type": index_type}],
+    }
+
+def vector_embedding_policy(distance_function: str):
+    return {
+        "vectorEmbeddings": [
+            {
+                "path": "/embedding",
+                "dataType": "float32",
+                "distanceFunction": distance_function,
+                "dimensions": 1536,
+            }
+        ]
+    }
 
 partition_key = PartitionKey(path="/id")
 cosmos_container_properties_test = {"partition_key": partition_key}
 cosmos_database_properties_test = {}
 
 # @pytest.fixture(scope="session")
-def test_azure_cosmos_db_nosql_semantic_cache() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("quantizedFlat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -70,13 +73,13 @@ def test_azure_cosmos_db_nosql_semantic_cache() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("flat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -96,13 +99,13 @@ def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_semantic_cache_multi() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("quantizedFlat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -125,13 +128,13 @@ def test_azure_cosmos_db_semantic_cache_multi() -> None:
 
 
 
-def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("flat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -154,13 +157,13 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
 
 
 
-def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("quantizedFlat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -181,13 +184,13 @@ def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
 
 
 
-def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=test_client,
             embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("flat"),
             cosmos_container_properties=cosmos_container_properties_test,
             cosmos_database_properties=cosmos_database_properties_test,
         )
@@ -208,58 +211,4 @@ def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
 
 
 
-def test_azure_cosmos_db_semantic_cache_multi_hnsw() -> None:
-    set_llm_cache(
-        AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
-            embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
-            cosmos_container_properties=cosmos_container_properties_test,
-            cosmos_database_properties=cosmos_database_properties_test,
-        )
-    )
-
-    llm = FakeLLM()
-    params = llm.dict()
-    params["stop"] = None
-    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    get_llm_cache().update(
-        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
-    )
-
-    # foo and bar will have the same embedding produced by FakeEmbeddings
-    cache_output = get_llm_cache().lookup("bar", llm_string)
-    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
-
-    # clear the cache
-    get_llm_cache().clear(llm_string=llm_string)
-
 
-
-def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw() -> None:
-    set_llm_cache(
-        AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
-            embedding=FakeEmbeddings(),
-            vector_embedding_policy=vector_embedding_policy,
-            indexing_policy=indexing_policy,
-            cosmos_container_properties=cosmos_container_properties_test,
-            cosmos_database_properties=cosmos_database_properties_test,
-        )
-    )
-
-    llm = FakeLLM()
-    params = llm.dict()
-    params["stop"] = None
-    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    get_llm_cache().update(
-        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
-    )
-
-    # foo and bar will have the same embedding produced by FakeEmbeddings
-    cache_output = get_llm_cache().lookup("bar", llm_string)
-    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
-
-    # clear the cache
-    get_llm_cache().clear(llm_string=llm_string)

From d23bcd646ce4ad38488f5ca9c91b1a71370f2c40 Mon Sep 17 00:00:00 2001
From: Chester Curme <chester.curme@gmail.com>
Date: Tue, 27 Aug 2024 13:04:48 -0400
Subject: [PATCH 05/44] format

---
 libs/community/langchain_community/cache.py   | 34 +++++++++++--------
 .../cache/test_azure_cosmosdbnosql_cache.py   | 27 ++++++---------
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 46d6f02e5ac3b..431ed294931d8 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -60,7 +60,6 @@
 )
 from langchain_community.vectorstores.utils import DistanceStrategy
 
-
 # from libs.community.langchain_community.vectorstores.azure_cosmos_db_no_sql import AzureCosmosDBNoSqlVectorSearch
 
 try:
@@ -83,7 +82,10 @@
 from langchain_community.utilities.astradb import (
     _AstraDBCollectionEnvironment,
 )
-from langchain_community.vectorstores import AzureCosmosDBVectorSearch, AzureCosmosDBNoSqlVectorSearch
+from langchain_community.vectorstores import (
+    AzureCosmosDBNoSqlVectorSearch,
+    AzureCosmosDBVectorSearch,
+)
 from langchain_community.vectorstores import (
     OpenSearchVectorSearch as OpenSearchVectorStore,
 )
@@ -2276,20 +2278,22 @@ def clear(self, **kwargs: Any) -> None:
     def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None:
         if not isinstance(value, enum_type):
             raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.")
+
+
 class AzureCosmosDBNoSqlSemanticCache(BaseCache):
     """Cache that uses Cosmos DB NoSQL backend"""
 
     def __init__(
-            self,
-            embedding: Embeddings,
-            cosmos_client: Optional[Any] = None,
-            database_name: str = "CosmosNoSqlCacheDB",
-            container_name: str = "CosmosNoSqlCacheContainer",
-            *,
-            vector_embedding_policy: Optional[Dict[str, Any]] = None,
-            indexing_policy: Optional[Dict[str, Any]] = None,
-            cosmos_container_properties: Dict[str, Any],
-            cosmos_database_properties: Dict[str, Any]
+        self,
+        embedding: Embeddings,
+        cosmos_client: Optional[Any] = None,
+        database_name: str = "CosmosNoSqlCacheDB",
+        container_name: str = "CosmosNoSqlCacheContainer",
+        *,
+        vector_embedding_policy: Optional[Dict[str, Any]] = None,
+        indexing_policy: Optional[Dict[str, Any]] = None,
+        cosmos_container_properties: Dict[str, Any],
+        cosmos_database_properties: Dict[str, Any],
     ):
         self.cosmos_client = cosmos_client
         self.database_name = database_name
@@ -2302,8 +2306,7 @@ def __init__(
         self._cache_: Optional[AzureCosmosDBNoSqlVectorSearch] = None
 
     def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
-
-        #create new vectorstore client to create the cache
+        # create new vectorstore client to create the cache
         if self.cosmos_client:
             self._cache_ = AzureCosmosDBNoSqlVectorSearch(
                 cosmos_client=self.cosmos_client,
@@ -2313,7 +2316,7 @@ def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
                 cosmos_container_properties=self.cosmos_container_properties,
                 cosmos_database_properties=self.cosmos_database_properties,
                 database_name=self.database_name,
-                container_name=self.container_name
+                container_name=self.container_name,
             )
 
         return self._cache_
@@ -2371,6 +2374,7 @@ def clear(self, **kwargs: Any) -> None:
         container = database.get_container_client(self.container_name)
         database.delete_container(self.container_name)
 
+
 class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 
diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 12a5b001275a1..a44983c6fcea7 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -1,5 +1,5 @@
-"""Test Azure CosmosDB NoSql cache functionality.
-"""
+"""Test Azure CosmosDB NoSql cache functionality."""
+
 import os
 import uuid
 
@@ -7,21 +7,20 @@
 from azure.cosmos import CosmosClient, PartitionKey
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
-
-from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
-from langchain_community.vectorstores import AzureCosmosDBNoSqlVectorSearch
-
 from libs.community.tests.integration_tests.cache.fake_embeddings import (
     FakeEmbeddings,
 )
 from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
 
+from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
+from langchain_community.vectorstores import AzureCosmosDBNoSqlVectorSearch
 
-URI = 'COSMOS_DB_URI'
-KEY = 'COSMOS_DB_KEY'
+URI = "COSMOS_DB_URI"
+KEY = "COSMOS_DB_KEY"
 test_client = CosmosClient(URI, credential=KEY)
 
-#cosine, euclidean, innerproduct
+
+# cosine, euclidean, innerproduct
 def indexing_policy(index_type: str):
     return {
         "indexingMode": "consistent",
@@ -30,6 +29,7 @@ def indexing_policy(index_type: str):
         "vectorIndexes": [{"path": "/embedding", "type": index_type}],
     }
 
+
 def vector_embedding_policy(distance_function: str):
     return {
         "vectorEmbeddings": [
@@ -42,10 +42,12 @@ def vector_embedding_policy(distance_function: str):
         ]
     }
 
+
 partition_key = PartitionKey(path="/id")
 cosmos_container_properties_test = {"partition_key": partition_key}
 cosmos_database_properties_test = {}
 
+
 # @pytest.fixture(scope="session")
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
     set_llm_cache(
@@ -127,7 +129,6 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None
     get_llm_cache().clear(llm_string=llm_string)
 
 
-
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -156,7 +157,6 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -183,7 +183,6 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -208,7 +207,3 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
 
     # clear the cache
     get_llm_cache().clear(llm_string=llm_string)
-
-
-
-

From 4f58256d25e4a85702142650f4a7a290cede3cbf Mon Sep 17 00:00:00 2001
From: Chester Curme <chester.curme@gmail.com>
Date: Tue, 27 Aug 2024 13:09:10 -0400
Subject: [PATCH 06/44] lint

---
 libs/community/langchain_community/cache.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 642c300c7b5cc..27b58518b8536 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -60,8 +60,6 @@
 )
 from langchain_community.vectorstores.utils import DistanceStrategy
 
-# from libs.community.langchain_community.vectorstores.azure_cosmos_db_no_sql import AzureCosmosDBNoSqlVectorSearch
-
 try:
     from sqlalchemy.orm import declarative_base
 except ImportError:
@@ -2371,7 +2369,6 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
     def clear(self, **kwargs: Any) -> None:
         """Clear semantic cache for a given llm_string."""
         database = self.cosmos_client.get_database_client(self.database_name)
-        container = database.get_container_client(self.container_name)
         database.delete_container(self.container_name)
 
 

From aa1e84679249700852ae791dbf93c3e9886fd643 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 14:47:09 -0700
Subject: [PATCH 07/44] linting

---
 libs/community/langchain_community/cache.py            | 10 +++++-----
 .../cache/test_azure_cosmosdbnosql_cache.py            |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 27b58518b8536..324f23eafd989 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -48,6 +48,7 @@
     cast,
 )
 
+from azure.cosmos import CosmosClient
 from sqlalchemy import Column, Integer, String, create_engine, delete, select
 from sqlalchemy.engine import Row
 from sqlalchemy.engine.base import Engine
@@ -2284,12 +2285,12 @@ class AzureCosmosDBNoSqlSemanticCache(BaseCache):
     def __init__(
         self,
         embedding: Embeddings,
-        cosmos_client: Optional[Any] = None,
+        cosmos_client: CosmosClient,
         database_name: str = "CosmosNoSqlCacheDB",
         container_name: str = "CosmosNoSqlCacheContainer",
         *,
-        vector_embedding_policy: Optional[Dict[str, Any]] = None,
-        indexing_policy: Optional[Dict[str, Any]] = None,
+        vector_embedding_policy: Dict[str, Any] = None,
+        indexing_policy: Dict[str, Any] = None,
         cosmos_container_properties: Dict[str, Any],
         cosmos_database_properties: Dict[str, Any],
     ):
@@ -2341,8 +2342,7 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
                         "older format. Please recreate your cache to avoid this "
                         "error."
                     )
-                    # In a previous life we stored the raw text directly
-                    # in the table, so assume it's in that format.
+
                     generations.extend(
                         _load_generations_from_json(document.metadata["return_val"])
                     )
diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index a44983c6fcea7..2f8dfd4a45cfd 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -48,7 +48,6 @@ def vector_embedding_policy(distance_function: str):
 cosmos_database_properties_test = {}
 
 
-# @pytest.fixture(scope="session")
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(

From cd744f09e552136733e56d82864d214e4e6c20a9 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 14:55:37 -0700
Subject: [PATCH 08/44] linting

---
 libs/community/langchain_community/cache.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 324f23eafd989..bc3eb4a9a90ee 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2289,8 +2289,8 @@ def __init__(
         database_name: str = "CosmosNoSqlCacheDB",
         container_name: str = "CosmosNoSqlCacheContainer",
         *,
-        vector_embedding_policy: Dict[str, Any] = None,
-        indexing_policy: Dict[str, Any] = None,
+        vector_embedding_policy: Dict[str, Any],
+        indexing_policy: Dict[str, Any],
         cosmos_container_properties: Dict[str, Any],
         cosmos_database_properties: Dict[str, Any],
     ):

From c625e7d1f556b9248e7c4d198a0d5da56d897851 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 15:05:37 -0700
Subject: [PATCH 09/44] linting

---
 libs/community/langchain_community/cache.py | 28 +++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index bc3eb4a9a90ee..65b29db238661 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2302,12 +2302,24 @@ def __init__(
         self.indexing_policy = indexing_policy
         self.cosmos_container_properties = cosmos_container_properties
         self.cosmos_database_properties = cosmos_database_properties
-        self._cache_: Optional[AzureCosmosDBNoSqlVectorSearch] = None
+        self._cache_dict: Dict[str, AzureCosmosDBNoSqlVectorSearch] = {}
+
+    def _cache_name(self, llm_string: str) -> str:
+        hashed_index = _hash(llm_string)
+        return f"cache:{hashed_index}"
+
+    def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
+        cache_name = self._cache_name(llm_string)
+
+        # return vectorstore client for the specific llm string
+        if cache_name in self._cache_dict:
+            return self._cache_dict[cache_name]
+
+
 
-    def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
         # create new vectorstore client to create the cache
         if self.cosmos_client:
-            self._cache_ = AzureCosmosDBNoSqlVectorSearch(
+            self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(
                 cosmos_client=self.cosmos_client,
                 embedding=self.embedding,
                 vector_embedding_policy=self.vector_embedding_policy,
@@ -2318,13 +2330,11 @@ def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
                 container_name=self.container_name,
             )
 
-        return self._cache_
+        return self._cache_dict[cache_name]
 
     def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
         """Look up based on prompt."""
-        if not self._cache_:
-            self._cache_ = self._create_llm_cache(llm_string)
-        llm_cache = self._cache_
+        llm_cache = self._get_llm_cache(llm_string)
         generations: List = []
         # Read from a Hash
         results = llm_cache.similarity_search(
@@ -2356,9 +2366,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
                     "CosmosDBNoSqlSemanticCache only supports caching of "
                     f"normal LLM generations, got {type(gen)}"
                 )
-        if not self._cache_:
-            self._cache_ = self._create_llm_cache(llm_string)
-        llm_cache = self._cache_
+        llm_cache = self._get_llm_cache(llm_string)
         metadata = {
             "llm_string": llm_string,
             "prompt": prompt,

From 31b7c1bf7f7769d0f2f7d9986775ec2d933a1aa5 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 15:19:40 -0700
Subject: [PATCH 10/44] linting

---
 libs/community/langchain_community/cache.py | 241 ++++++++++----------
 1 file changed, 119 insertions(+), 122 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 65b29db238661..349945d95cce5 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -224,7 +224,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
         return self.lookup(prompt, llm_string)
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string."""
         self.update(prompt, llm_string, return_val)
@@ -410,7 +410,7 @@ def _ensure_generation_type(return_val: RETURN_VAL_TYPE) -> None:
 
     @staticmethod
     def _get_generations(
-        results: dict[str | bytes, str | bytes],
+            results: dict[str | bytes, str | bytes],
     ) -> Optional[List[Generation]]:
         generations = []
         if results:
@@ -431,7 +431,7 @@ def _get_generations(
 
     @staticmethod
     def _configure_pipeline_for_update(
-        key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
+            key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
     ) -> None:
         pipe.hset(
             key,
@@ -570,7 +570,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string. Async version."""
         self._ensure_generation_type(return_val)
@@ -615,7 +615,7 @@ class RedisSemanticCache(BaseCache):
     }
 
     def __init__(
-        self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
+            self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """Initialize by passing in the `init` GPTCache func
 
@@ -735,10 +735,10 @@ class GPTCache(BaseCache):
     """Cache that uses GPTCache as a backend."""
 
     def __init__(
-        self,
-        init_func: Union[
-            Callable[[Any, str], None], Callable[[Any], None], None
-        ] = None,
+            self,
+            init_func: Union[
+                Callable[[Any, str], None], Callable[[Any], None], None
+            ] = None,
     ):
         """Initialize by passing in init function (default: `None`).
 
@@ -866,7 +866,7 @@ def _ensure_cache_exists(cache_client: momento.CacheClient, cache_name: str) ->
 
     create_cache_response = cache_client.create_cache(cache_name)
     if isinstance(create_cache_response, CreateCache.Success) or isinstance(
-        create_cache_response, CreateCache.CacheAlreadyExists
+            create_cache_response, CreateCache.CacheAlreadyExists
     ):
         return None
     elif isinstance(create_cache_response, CreateCache.Error):
@@ -884,12 +884,12 @@ class MomentoCache(BaseCache):
     """Cache that uses Momento as a backend. See https://gomomento.com/"""
 
     def __init__(
-        self,
-        cache_client: momento.CacheClient,
-        cache_name: str,
-        *,
-        ttl: Optional[timedelta] = None,
-        ensure_cache_exists: bool = True,
+            self,
+            cache_client: momento.CacheClient,
+            cache_name: str,
+            *,
+            ttl: Optional[timedelta] = None,
+            ensure_cache_exists: bool = True,
     ):
         """Instantiate a prompt cache using Momento as a backend.
 
@@ -928,14 +928,14 @@ def __init__(
 
     @classmethod
     def from_client_params(
-        cls,
-        cache_name: str,
-        ttl: timedelta,
-        *,
-        configuration: Optional[momento.config.Configuration] = None,
-        api_key: Optional[str] = None,
-        auth_token: Optional[str] = None,  # for backwards compatibility
-        **kwargs: Any,
+            cls,
+            cache_name: str,
+            ttl: timedelta,
+            *,
+            configuration: Optional[momento.config.Configuration] = None,
+            api_key: Optional[str] = None,
+            auth_token: Optional[str] = None,  # for backwards compatibility
+            **kwargs: Any,
     ) -> MomentoCache:
         """Construct cache from CacheClient parameters."""
         try:
@@ -1091,13 +1091,13 @@ class CassandraCache(BaseCache):
     """
 
     def __init__(
-        self,
-        session: Optional[CassandraSession] = None,
-        keyspace: Optional[str] = None,
-        table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
-        ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
-        skip_provisioning: bool = False,
-        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+            self,
+            session: Optional[CassandraSession] = None,
+            keyspace: Optional[str] = None,
+            table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
+            ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
+            skip_provisioning: bool = False,
+            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1165,7 +1165,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         blob = _dumps_generations(return_val)
         await self.kv_cache.aput(
@@ -1175,7 +1175,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1280,17 +1280,17 @@ class CassandraSemanticCache(BaseCache):
     """
 
     def __init__(
-        self,
-        session: Optional[CassandraSession] = None,
-        keyspace: Optional[str] = None,
-        embedding: Optional[Embeddings] = None,
-        table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
-        distance_metric: Optional[str] = None,
-        score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
-        ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
-        skip_provisioning: bool = False,
-        similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
-        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+            self,
+            session: Optional[CassandraSession] = None,
+            keyspace: Optional[str] = None,
+            embedding: Optional[Embeddings] = None,
+            table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
+            distance_metric: Optional[str] = None,
+            score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
+            ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
+            skip_provisioning: bool = False,
+            similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
+            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1394,7 +1394,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         embedding_vector = await self._aget_embedding(text=prompt)
         llm_string_hash = _hash(llm_string)
@@ -1427,7 +1427,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1458,7 +1458,7 @@ def lookup_with_id(
             return None
 
     async def alookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1489,7 +1489,7 @@ async def alookup_with_id(
             return None
 
     def lookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -1498,7 +1498,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -1549,7 +1549,7 @@ class SQLAlchemyMd5Cache(BaseCache):
     """Cache that uses SQAlchemy as a backend."""
 
     def __init__(
-        self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
+            self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
     ):
         """Initialize by creating all tables."""
         self.engine = engine
@@ -1627,16 +1627,16 @@ def _make_id(prompt: str, llm_string: str) -> str:
         return f"{_hash(prompt)}#{_hash(llm_string)}"
 
     def __init__(
-        self,
-        *,
-        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-        token: Optional[str] = None,
-        api_endpoint: Optional[str] = None,
-        astra_db_client: Optional[AstraDB] = None,
-        async_astra_db_client: Optional[AsyncAstraDB] = None,
-        namespace: Optional[str] = None,
-        pre_delete_collection: bool = False,
-        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+            self,
+            *,
+            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+            token: Optional[str] = None,
+            api_endpoint: Optional[str] = None,
+            astra_db_client: Optional[AstraDB] = None,
+            async_astra_db_client: Optional[AsyncAstraDB] = None,
+            namespace: Optional[str] = None,
+            pre_delete_collection: bool = False,
+            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
     ):
         """
         Cache that uses Astra DB as a backend.
@@ -1717,7 +1717,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1730,7 +1730,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1744,7 +1744,7 @@ def delete_through_llm(
         return self.delete(prompt, llm_string=llm_string)
 
     async def adelete_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `adelete` with the LLM being passed.
@@ -1784,7 +1784,6 @@ async def aclear(self, **kwargs: Any) -> None:
 ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_semantic_cache"
 ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE = 16
 
-
 _unset = ["unset"]
 
 
@@ -1828,19 +1827,19 @@ def decorating_function(user_function: Callable) -> Callable:
 )
 class AstraDBSemanticCache(BaseCache):
     def __init__(
-        self,
-        *,
-        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-        token: Optional[str] = None,
-        api_endpoint: Optional[str] = None,
-        astra_db_client: Optional[AstraDB] = None,
-        async_astra_db_client: Optional[AsyncAstraDB] = None,
-        namespace: Optional[str] = None,
-        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
-        pre_delete_collection: bool = False,
-        embedding: Embeddings,
-        metric: Optional[str] = None,
-        similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
+            self,
+            *,
+            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+            token: Optional[str] = None,
+            api_endpoint: Optional[str] = None,
+            astra_db_client: Optional[AstraDB] = None,
+            async_astra_db_client: Optional[AsyncAstraDB] = None,
+            namespace: Optional[str] = None,
+            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+            pre_delete_collection: bool = False,
+            embedding: Embeddings,
+            metric: Optional[str] = None,
+            similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
     ):
         """
         Cache that uses Astra DB as a vector-store backend for semantic
@@ -1946,7 +1945,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1978,7 +1977,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2008,7 +2007,7 @@ def lookup_with_id(
                 return None
 
     async def alookup_with_id(
-        self, prompt: str, llm_string: str
+            self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2038,7 +2037,7 @@ async def alookup_with_id(
                 return None
 
     def lookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -2047,7 +2046,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -2091,22 +2090,22 @@ class AzureCosmosDBSemanticCache(BaseCache):
     DEFAULT_COLLECTION_NAME = "CosmosMongoVCoreCacheColl"
 
     def __init__(
-        self,
-        cosmosdb_connection_string: str,
-        database_name: str,
-        collection_name: str,
-        embedding: Embeddings,
-        *,
-        cosmosdb_client: Optional[Any] = None,
-        num_lists: int = 100,
-        similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
-        kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
-        dimensions: int = 1536,
-        m: int = 16,
-        ef_construction: int = 64,
-        ef_search: int = 40,
-        score_threshold: Optional[float] = None,
-        application_name: str = "LANGCHAIN_CACHING_PYTHON",
+            self,
+            cosmosdb_connection_string: str,
+            database_name: str,
+            collection_name: str,
+            embedding: Embeddings,
+            *,
+            cosmosdb_client: Optional[Any] = None,
+            num_lists: int = 100,
+            similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
+            kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
+            dimensions: int = 1536,
+            m: int = 16,
+            ef_construction: int = 64,
+            ef_search: int = 40,
+            score_threshold: Optional[float] = None,
+            application_name: str = "LANGCHAIN_CACHING_PYTHON",
     ):
         """
         Args:
@@ -2283,16 +2282,16 @@ class AzureCosmosDBNoSqlSemanticCache(BaseCache):
     """Cache that uses Cosmos DB NoSQL backend"""
 
     def __init__(
-        self,
-        embedding: Embeddings,
-        cosmos_client: CosmosClient,
-        database_name: str = "CosmosNoSqlCacheDB",
-        container_name: str = "CosmosNoSqlCacheContainer",
-        *,
-        vector_embedding_policy: Dict[str, Any],
-        indexing_policy: Dict[str, Any],
-        cosmos_container_properties: Dict[str, Any],
-        cosmos_database_properties: Dict[str, Any],
+            self,
+            embedding: Embeddings,
+            cosmos_client: CosmosClient,
+            database_name: str = "CosmosNoSqlCacheDB",
+            container_name: str = "CosmosNoSqlCacheContainer",
+            *,
+            vector_embedding_policy: Dict[str, Any],
+            indexing_policy: Dict[str, Any],
+            cosmos_container_properties: Dict[str, Any],
+            cosmos_database_properties: Dict[str, Any],
     ):
         self.cosmos_client = cosmos_client
         self.database_name = database_name
@@ -2315,8 +2314,6 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
         if cache_name in self._cache_dict:
             return self._cache_dict[cache_name]
 
-
-
         # create new vectorstore client to create the cache
         if self.cosmos_client:
             self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(
@@ -2384,7 +2381,7 @@ class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 
     def __init__(
-        self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
+            self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """
         Args:
@@ -2486,12 +2483,12 @@ class SingleStoreDBSemanticCache(BaseCache):
     """Cache that uses SingleStore DB as a backend"""
 
     def __init__(
-        self,
-        embedding: Embeddings,
-        *,
-        cache_table_prefix: str = "cache_",
-        search_threshold: float = 0.2,
-        **kwargs: Any,
+            self,
+            embedding: Embeddings,
+            *,
+            cache_table_prefix: str = "cache_",
+            search_threshold: float = 0.2,
+            **kwargs: Any,
     ):
         """Initialize with necessary components.
 
@@ -2666,12 +2663,12 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
         if results:
             for document_score in results:
                 if (
-                    document_score[1] > self.search_threshold
-                    and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT
+                        document_score[1] > self.search_threshold
+                        and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT
                 ) or (
-                    document_score[1] < self.search_threshold
-                    and llm_cache.distance_strategy
-                    == DistanceStrategy.EUCLIDEAN_DISTANCE
+                        document_score[1] < self.search_threshold
+                        and llm_cache.distance_strategy
+                        == DistanceStrategy.EUCLIDEAN_DISTANCE
                 ):
                     generations.extend(loads(document_score[0].metadata["return_val"]))
         return generations if generations else None

From 98d9d7ebd45945e8dae810ef2d070efd8f9a4549 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 15:29:23 -0700
Subject: [PATCH 11/44] linting

---
 libs/community/langchain_community/cache.py | 241 ++++++++++----------
 1 file changed, 122 insertions(+), 119 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 349945d95cce5..65b29db238661 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -224,7 +224,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
         return self.lookup(prompt, llm_string)
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string."""
         self.update(prompt, llm_string, return_val)
@@ -410,7 +410,7 @@ def _ensure_generation_type(return_val: RETURN_VAL_TYPE) -> None:
 
     @staticmethod
     def _get_generations(
-            results: dict[str | bytes, str | bytes],
+        results: dict[str | bytes, str | bytes],
     ) -> Optional[List[Generation]]:
         generations = []
         if results:
@@ -431,7 +431,7 @@ def _get_generations(
 
     @staticmethod
     def _configure_pipeline_for_update(
-            key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
+        key: str, pipe: Any, return_val: RETURN_VAL_TYPE, ttl: Optional[int] = None
     ) -> None:
         pipe.hset(
             key,
@@ -570,7 +570,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         """Update cache based on prompt and llm_string. Async version."""
         self._ensure_generation_type(return_val)
@@ -615,7 +615,7 @@ class RedisSemanticCache(BaseCache):
     }
 
     def __init__(
-            self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
+        self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """Initialize by passing in the `init` GPTCache func
 
@@ -735,10 +735,10 @@ class GPTCache(BaseCache):
     """Cache that uses GPTCache as a backend."""
 
     def __init__(
-            self,
-            init_func: Union[
-                Callable[[Any, str], None], Callable[[Any], None], None
-            ] = None,
+        self,
+        init_func: Union[
+            Callable[[Any, str], None], Callable[[Any], None], None
+        ] = None,
     ):
         """Initialize by passing in init function (default: `None`).
 
@@ -866,7 +866,7 @@ def _ensure_cache_exists(cache_client: momento.CacheClient, cache_name: str) ->
 
     create_cache_response = cache_client.create_cache(cache_name)
     if isinstance(create_cache_response, CreateCache.Success) or isinstance(
-            create_cache_response, CreateCache.CacheAlreadyExists
+        create_cache_response, CreateCache.CacheAlreadyExists
     ):
         return None
     elif isinstance(create_cache_response, CreateCache.Error):
@@ -884,12 +884,12 @@ class MomentoCache(BaseCache):
     """Cache that uses Momento as a backend. See https://gomomento.com/"""
 
     def __init__(
-            self,
-            cache_client: momento.CacheClient,
-            cache_name: str,
-            *,
-            ttl: Optional[timedelta] = None,
-            ensure_cache_exists: bool = True,
+        self,
+        cache_client: momento.CacheClient,
+        cache_name: str,
+        *,
+        ttl: Optional[timedelta] = None,
+        ensure_cache_exists: bool = True,
     ):
         """Instantiate a prompt cache using Momento as a backend.
 
@@ -928,14 +928,14 @@ def __init__(
 
     @classmethod
     def from_client_params(
-            cls,
-            cache_name: str,
-            ttl: timedelta,
-            *,
-            configuration: Optional[momento.config.Configuration] = None,
-            api_key: Optional[str] = None,
-            auth_token: Optional[str] = None,  # for backwards compatibility
-            **kwargs: Any,
+        cls,
+        cache_name: str,
+        ttl: timedelta,
+        *,
+        configuration: Optional[momento.config.Configuration] = None,
+        api_key: Optional[str] = None,
+        auth_token: Optional[str] = None,  # for backwards compatibility
+        **kwargs: Any,
     ) -> MomentoCache:
         """Construct cache from CacheClient parameters."""
         try:
@@ -1091,13 +1091,13 @@ class CassandraCache(BaseCache):
     """
 
     def __init__(
-            self,
-            session: Optional[CassandraSession] = None,
-            keyspace: Optional[str] = None,
-            table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
-            ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
-            skip_provisioning: bool = False,
-            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+        self,
+        session: Optional[CassandraSession] = None,
+        keyspace: Optional[str] = None,
+        table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
+        ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
+        skip_provisioning: bool = False,
+        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1165,7 +1165,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         blob = _dumps_generations(return_val)
         await self.kv_cache.aput(
@@ -1175,7 +1175,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1280,17 +1280,17 @@ class CassandraSemanticCache(BaseCache):
     """
 
     def __init__(
-            self,
-            session: Optional[CassandraSession] = None,
-            keyspace: Optional[str] = None,
-            embedding: Optional[Embeddings] = None,
-            table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
-            distance_metric: Optional[str] = None,
-            score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
-            ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
-            skip_provisioning: bool = False,
-            similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
-            setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
+        self,
+        session: Optional[CassandraSession] = None,
+        keyspace: Optional[str] = None,
+        embedding: Optional[Embeddings] = None,
+        table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
+        distance_metric: Optional[str] = None,
+        score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
+        ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
+        skip_provisioning: bool = False,
+        similarity_measure: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
+        setup_mode: CassandraSetupMode = CassandraSetupMode.SYNC,
     ):
         if skip_provisioning:
             warn_deprecated(
@@ -1394,7 +1394,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         embedding_vector = await self._aget_embedding(text=prompt)
         llm_string_hash = _hash(llm_string)
@@ -1427,7 +1427,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1458,7 +1458,7 @@ def lookup_with_id(
             return None
 
     async def alookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -1489,7 +1489,7 @@ async def alookup_with_id(
             return None
 
     def lookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -1498,7 +1498,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -1549,7 +1549,7 @@ class SQLAlchemyMd5Cache(BaseCache):
     """Cache that uses SQAlchemy as a backend."""
 
     def __init__(
-            self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
+        self, engine: Engine, cache_schema: Type[FullMd5LLMCache] = FullMd5LLMCache
     ):
         """Initialize by creating all tables."""
         self.engine = engine
@@ -1627,16 +1627,16 @@ def _make_id(prompt: str, llm_string: str) -> str:
         return f"{_hash(prompt)}#{_hash(llm_string)}"
 
     def __init__(
-            self,
-            *,
-            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-            token: Optional[str] = None,
-            api_endpoint: Optional[str] = None,
-            astra_db_client: Optional[AstraDB] = None,
-            async_astra_db_client: Optional[AsyncAstraDB] = None,
-            namespace: Optional[str] = None,
-            pre_delete_collection: bool = False,
-            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+        self,
+        *,
+        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+        token: Optional[str] = None,
+        api_endpoint: Optional[str] = None,
+        astra_db_client: Optional[AstraDB] = None,
+        async_astra_db_client: Optional[AsyncAstraDB] = None,
+        namespace: Optional[str] = None,
+        pre_delete_collection: bool = False,
+        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
     ):
         """
         Cache that uses Astra DB as a backend.
@@ -1717,7 +1717,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1730,7 +1730,7 @@ async def aupdate(
         )
 
     def delete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `delete` with the LLM being passed.
@@ -1744,7 +1744,7 @@ def delete_through_llm(
         return self.delete(prompt, llm_string=llm_string)
 
     async def adelete_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> None:
         """
         A wrapper around `adelete` with the LLM being passed.
@@ -1784,6 +1784,7 @@ async def aclear(self, **kwargs: Any) -> None:
 ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_semantic_cache"
 ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE = 16
 
+
 _unset = ["unset"]
 
 
@@ -1827,19 +1828,19 @@ def decorating_function(user_function: Callable) -> Callable:
 )
 class AstraDBSemanticCache(BaseCache):
     def __init__(
-            self,
-            *,
-            collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
-            token: Optional[str] = None,
-            api_endpoint: Optional[str] = None,
-            astra_db_client: Optional[AstraDB] = None,
-            async_astra_db_client: Optional[AsyncAstraDB] = None,
-            namespace: Optional[str] = None,
-            setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
-            pre_delete_collection: bool = False,
-            embedding: Embeddings,
-            metric: Optional[str] = None,
-            similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
+        self,
+        *,
+        collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
+        token: Optional[str] = None,
+        api_endpoint: Optional[str] = None,
+        astra_db_client: Optional[AstraDB] = None,
+        async_astra_db_client: Optional[AsyncAstraDB] = None,
+        namespace: Optional[str] = None,
+        setup_mode: AstraSetupMode = AstraSetupMode.SYNC,
+        pre_delete_collection: bool = False,
+        embedding: Embeddings,
+        metric: Optional[str] = None,
+        similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
     ):
         """
         Cache that uses Astra DB as a vector-store backend for semantic
@@ -1945,7 +1946,7 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
         )
 
     async def aupdate(
-            self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
     ) -> None:
         await self.astra_env.aensure_db_setup()
         doc_id = self._make_id(prompt, llm_string)
@@ -1977,7 +1978,7 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
             return None
 
     def lookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2007,7 +2008,7 @@ def lookup_with_id(
                 return None
 
     async def alookup_with_id(
-            self, prompt: str, llm_string: str
+        self, prompt: str, llm_string: str
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         """
         Look up based on prompt and llm_string.
@@ -2037,7 +2038,7 @@ async def alookup_with_id(
                 return None
 
     def lookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = get_prompts(
             {**llm.dict(), **{"stop": stop}},
@@ -2046,7 +2047,7 @@ def lookup_with_id_through_llm(
         return self.lookup_with_id(prompt, llm_string=llm_string)
 
     async def alookup_with_id_through_llm(
-            self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
+        self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
     ) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
         llm_string = (
             await aget_prompts(
@@ -2090,22 +2091,22 @@ class AzureCosmosDBSemanticCache(BaseCache):
     DEFAULT_COLLECTION_NAME = "CosmosMongoVCoreCacheColl"
 
     def __init__(
-            self,
-            cosmosdb_connection_string: str,
-            database_name: str,
-            collection_name: str,
-            embedding: Embeddings,
-            *,
-            cosmosdb_client: Optional[Any] = None,
-            num_lists: int = 100,
-            similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
-            kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
-            dimensions: int = 1536,
-            m: int = 16,
-            ef_construction: int = 64,
-            ef_search: int = 40,
-            score_threshold: Optional[float] = None,
-            application_name: str = "LANGCHAIN_CACHING_PYTHON",
+        self,
+        cosmosdb_connection_string: str,
+        database_name: str,
+        collection_name: str,
+        embedding: Embeddings,
+        *,
+        cosmosdb_client: Optional[Any] = None,
+        num_lists: int = 100,
+        similarity: CosmosDBSimilarityType = CosmosDBSimilarityType.COS,
+        kind: CosmosDBVectorSearchType = CosmosDBVectorSearchType.VECTOR_IVF,
+        dimensions: int = 1536,
+        m: int = 16,
+        ef_construction: int = 64,
+        ef_search: int = 40,
+        score_threshold: Optional[float] = None,
+        application_name: str = "LANGCHAIN_CACHING_PYTHON",
     ):
         """
         Args:
@@ -2282,16 +2283,16 @@ class AzureCosmosDBNoSqlSemanticCache(BaseCache):
     """Cache that uses Cosmos DB NoSQL backend"""
 
     def __init__(
-            self,
-            embedding: Embeddings,
-            cosmos_client: CosmosClient,
-            database_name: str = "CosmosNoSqlCacheDB",
-            container_name: str = "CosmosNoSqlCacheContainer",
-            *,
-            vector_embedding_policy: Dict[str, Any],
-            indexing_policy: Dict[str, Any],
-            cosmos_container_properties: Dict[str, Any],
-            cosmos_database_properties: Dict[str, Any],
+        self,
+        embedding: Embeddings,
+        cosmos_client: CosmosClient,
+        database_name: str = "CosmosNoSqlCacheDB",
+        container_name: str = "CosmosNoSqlCacheContainer",
+        *,
+        vector_embedding_policy: Dict[str, Any],
+        indexing_policy: Dict[str, Any],
+        cosmos_container_properties: Dict[str, Any],
+        cosmos_database_properties: Dict[str, Any],
     ):
         self.cosmos_client = cosmos_client
         self.database_name = database_name
@@ -2314,6 +2315,8 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
         if cache_name in self._cache_dict:
             return self._cache_dict[cache_name]
 
+
+
         # create new vectorstore client to create the cache
         if self.cosmos_client:
             self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(
@@ -2381,7 +2384,7 @@ class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 
     def __init__(
-            self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
+        self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
     ):
         """
         Args:
@@ -2483,12 +2486,12 @@ class SingleStoreDBSemanticCache(BaseCache):
     """Cache that uses SingleStore DB as a backend"""
 
     def __init__(
-            self,
-            embedding: Embeddings,
-            *,
-            cache_table_prefix: str = "cache_",
-            search_threshold: float = 0.2,
-            **kwargs: Any,
+        self,
+        embedding: Embeddings,
+        *,
+        cache_table_prefix: str = "cache_",
+        search_threshold: float = 0.2,
+        **kwargs: Any,
     ):
         """Initialize with necessary components.
 
@@ -2663,12 +2666,12 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
         if results:
             for document_score in results:
                 if (
-                        document_score[1] > self.search_threshold
-                        and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT
+                    document_score[1] > self.search_threshold
+                    and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT
                 ) or (
-                        document_score[1] < self.search_threshold
-                        and llm_cache.distance_strategy
-                        == DistanceStrategy.EUCLIDEAN_DISTANCE
+                    document_score[1] < self.search_threshold
+                    and llm_cache.distance_strategy
+                    == DistanceStrategy.EUCLIDEAN_DISTANCE
                 ):
                     generations.extend(loads(document_score[0].metadata["return_val"]))
         return generations if generations else None

From 2c4d2d3763b9de3aa175aab007ed8a4c2fd90c3d Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 27 Aug 2024 15:31:19 -0700
Subject: [PATCH 12/44] linting

---
 libs/community/langchain_community/cache.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 65b29db238661..656bf7c2cb06e 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2315,8 +2315,6 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
         if cache_name in self._cache_dict:
             return self._cache_dict[cache_name]
 
-
-
         # create new vectorstore client to create the cache
         if self.cosmos_client:
             self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(

From 2c4f2816444fd33669d6e1f8bd6ea68424dfe8b7 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:26:21 -0700
Subject: [PATCH 13/44] Linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py                  | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 2f8dfd4a45cfd..bd98a9e5925ba 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -1,9 +1,5 @@
 """Test Azure CosmosDB NoSql cache functionality."""
 
-import os
-import uuid
-
-import pytest
 from azure.cosmos import CosmosClient, PartitionKey
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
@@ -13,7 +9,6 @@
 from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
 
 from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
-from langchain_community.vectorstores import AzureCosmosDBNoSqlVectorSearch
 
 URI = "COSMOS_DB_URI"
 KEY = "COSMOS_DB_KEY"

From 0caa7cd12009b3d9479647262e08e3dc6c5f0d38 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:33:35 -0700
Subject: [PATCH 14/44] Linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py                 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index bd98a9e5925ba..4af653a279d8c 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -16,7 +16,7 @@
 
 
 # cosine, euclidean, innerproduct
-def indexing_policy(index_type: str):
+def indexing_policy(index_type: str) -> dict:
     return {
         "indexingMode": "consistent",
         "includedPaths": [{"path": "/*"}],
@@ -25,7 +25,7 @@ def indexing_policy(index_type: str):
     }
 
 
-def vector_embedding_policy(distance_function: str):
+def vector_embedding_policy(distance_function: str) -> dict:
     return {
         "vectorEmbeddings": [
             {
@@ -40,7 +40,7 @@ def vector_embedding_policy(distance_function: str):
 
 partition_key = PartitionKey(path="/id")
 cosmos_container_properties_test = {"partition_key": partition_key}
-cosmos_database_properties_test = {}
+cosmos_database_properties_test: Dict[str, str] = {}
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:

From 88919768a86efa2e44c7fdb86a2610537ed8c51f Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:37:07 -0700
Subject: [PATCH 15/44] Linting

---
 .../integration_tests/cache/test_azure_cosmosdbnosql_cache.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 4af653a279d8c..8a30c035945db 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -1,5 +1,6 @@
 """Test Azure CosmosDB NoSql cache functionality."""
 
+from typing import Dict, Any
 from azure.cosmos import CosmosClient, PartitionKey
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
@@ -40,7 +41,7 @@ def vector_embedding_policy(distance_function: str) -> dict:
 
 partition_key = PartitionKey(path="/id")
 cosmos_container_properties_test = {"partition_key": partition_key}
-cosmos_database_properties_test: Dict[str, str] = {}
+cosmos_database_properties_test: Dict[str, Any] = {}
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:

From 9ffa53f125b83b535fc72173d3ad50e85ff78ec5 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:46:36 -0700
Subject: [PATCH 16/44] Linting

---
 .../integration_tests/cache/test_azure_cosmosdbnosql_cache.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 8a30c035945db..aa2284d753be2 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -1,6 +1,7 @@
 """Test Azure CosmosDB NoSql cache functionality."""
 
-from typing import Dict, Any
+from typing import Any, Dict
+
 from azure.cosmos import CosmosClient, PartitionKey
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation

From eb1f8bf8471ecda6a58d10364ae055f2e63f8f6e Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:46:57 -0700
Subject: [PATCH 17/44] Linting

---
 libs/community/langchain_community/cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 4ad096ebca637..b0e5528108084 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -48,7 +48,6 @@
     cast,
 )
 
-from azure.cosmos import CosmosClient
 from sqlalchemy import Column, Integer, String, create_engine, delete, select
 from sqlalchemy.engine import Row
 from sqlalchemy.engine.base import Engine
@@ -97,6 +96,7 @@
     import momento
     from astrapy.db import AstraDB, AsyncAstraDB
     from cassandra.cluster import Session as CassandraSession
+    from azure.cosmos.cosmos_client import CosmosClient
 
 
 def _hash(_input: str) -> str:

From 3599624b066a7d723369a96619a0e3b81469cddc Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 11:50:06 -0700
Subject: [PATCH 18/44] Linting

---
 libs/community/langchain_community/cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index b0e5528108084..72479e560fea5 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -95,8 +95,8 @@
 if TYPE_CHECKING:
     import momento
     from astrapy.db import AstraDB, AsyncAstraDB
-    from cassandra.cluster import Session as CassandraSession
     from azure.cosmos.cosmos_client import CosmosClient
+    from cassandra.cluster import Session as CassandraSession
 
 
 def _hash(_input: str) -> str:

From f2f1ff57d721aabf0c9a40fcbc8e16d7495e3ca0 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 12:00:15 -0700
Subject: [PATCH 19/44] Linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py   | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index aa2284d753be2..464c8f70ba983 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -1,8 +1,9 @@
-"""Test Azure CosmosDB NoSql cache functionality."""
+"""Test` Azure CosmosDB NoSql cache functionality."""
 
 from typing import Any, Dict
 
 from azure.cosmos import CosmosClient, PartitionKey
+import pytest
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
 from libs.community.tests.integration_tests.cache.fake_embeddings import (
@@ -14,9 +15,20 @@
 
 URI = "COSMOS_DB_URI"
 KEY = "COSMOS_DB_KEY"
-test_client = CosmosClient(URI, credential=KEY)
 
 
+@pytest.fixture()
+def cosmos_client() -> Any:
+    from azure.cosmos import CosmosClient
+
+    return CosmosClient(HOST, KEY)
+
+@pytest.fixture()
+def partition_key() -> Any:
+    from azure.cosmos import PartitionKey
+
+    return PartitionKey(path="/id")
+
 # cosine, euclidean, innerproduct
 def indexing_policy(index_type: str) -> dict:
     return {
@@ -40,7 +52,6 @@ def vector_embedding_policy(distance_function: str) -> dict:
     }
 
 
-partition_key = PartitionKey(path="/id")
 cosmos_container_properties_test = {"partition_key": partition_key}
 cosmos_database_properties_test: Dict[str, Any] = {}
 
@@ -48,7 +59,7 @@ def vector_embedding_policy(distance_function: str) -> dict:
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("cosine"),
             indexing_policy=indexing_policy("quantizedFlat"),
@@ -74,7 +85,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("cosine"),
             indexing_policy=indexing_policy("flat"),
@@ -100,7 +111,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("dotProduct"),
             indexing_policy=indexing_policy("quantizedFlat"),
@@ -128,7 +139,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("dotProduct"),
             indexing_policy=indexing_policy("flat"),
@@ -156,7 +167,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("euclidean"),
             indexing_policy=indexing_policy("quantizedFlat"),
@@ -182,7 +193,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
-            cosmos_client=test_client,
+            cosmos_client=cosmos_client,
             embedding=FakeEmbeddings(),
             vector_embedding_policy=vector_embedding_policy("euclidean"),
             indexing_policy=indexing_policy("flat"),

From 1e818dad9d417d04df6cc266f6cb5ce9f6287fdd Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 12:02:27 -0700
Subject: [PATCH 20/44] Linting

---
 .../integration_tests/cache/test_azure_cosmosdbnosql_cache.py  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 464c8f70ba983..51a0e1cf85f0c 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -2,7 +2,6 @@
 
 from typing import Any, Dict
 
-from azure.cosmos import CosmosClient, PartitionKey
 import pytest
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
@@ -13,7 +12,7 @@
 
 from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
 
-URI = "COSMOS_DB_URI"
+HOST = "COSMOS_DB_URI"
 KEY = "COSMOS_DB_KEY"
 
 

From 0441131a0939cefa1ceda8e11de81433bf2d99ea Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 12:05:51 -0700
Subject: [PATCH 21/44] Linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py               | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 51a0e1cf85f0c..6536fcb41f7d7 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -5,12 +5,12 @@
 import pytest
 from langchain.globals import get_llm_cache, set_llm_cache
 from langchain_core.outputs import Generation
-from libs.community.tests.integration_tests.cache.fake_embeddings import (
-    FakeEmbeddings,
-)
-from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
 
 from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
+from tests.integration_tests.cache.fake_embeddings import (
+    FakeEmbeddings,
+)
+from tests.unit_tests.llms.fake_llm import FakeLLM
 
 HOST = "COSMOS_DB_URI"
 KEY = "COSMOS_DB_KEY"

From c3d09171a84fb1c45954b8392877fc57e7c90309 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Tue, 3 Sep 2024 12:24:32 -0700
Subject: [PATCH 22/44] Linting

---
 .../integration_tests/cache/test_azure_cosmosdbnosql_cache.py   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 6536fcb41f7d7..2156418ebfb35 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -22,12 +22,14 @@ def cosmos_client() -> Any:
 
     return CosmosClient(HOST, KEY)
 
+
 @pytest.fixture()
 def partition_key() -> Any:
     from azure.cosmos import PartitionKey
 
     return PartitionKey(path="/id")
 
+
 # cosine, euclidean, innerproduct
 def indexing_policy(index_type: str) -> dict:
     return {

From 860b0c06ab59c950f6a4bfa02182ecb38453c6b2 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:16:37 -0700
Subject: [PATCH 23/44] Adding notebook sample

---
 docs/docs/integrations/llm_caching.ipynb      | 90 ++++++++++++++++++-
 libs/community/langchain_community/cache.py   | 10 ++-
 .../vectorstores/azure_cosmos_db.py           |  5 +-
 .../vectorstores/azure_cosmos_db_no_sql.py    | 23 +++--
 .../cache/test_azure_cosmosdbnosql_cache.py   | 17 ++--
 .../test_azure_cosmos_db_no_sql.py            | 66 ++++++--------
 6 files changed, 142 insertions(+), 69 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index c344126d115a2..5a1ce0973ef3d 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -1854,6 +1854,94 @@
     "llm.invoke(\"Tell me a joke\")"
    ]
   },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Azure CosmosDB NoSql Semantic Cache\n",
+    "\n",
+    "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
+   ],
+   "id": "235ff73bf7143f13"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "from typing import Any, Dict\n",
+    "from azure.cosmos import CosmosClient, PartitionKey\n",
+    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "\n",
+    "HOST = \"COSMOS_DB_URI\"\n",
+    "KEY = \"COSMOS_DB_KEY\"\n",
+    "\n",
+    "cosmos_client = CosmosClient(HOST, KEY)\n",
+    "\n",
+    "def get_vector_indexing_policy() -> dict:\n",
+    "    return {\n",
+    "        \"indexingMode\": \"consistent\",\n",
+    "        \"includedPaths\": [{\"path\": \"/*\"}],\n",
+    "        \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n",
+    "        \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"quantized_flat\"}],\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def get_vector_embedding_policy() -> dict:\n",
+    "    return {\n",
+    "        \"vectorEmbeddings\": [\n",
+    "            {\n",
+    "                \"path\": \"/embedding\",\n",
+    "                \"dataType\": \"float32\",\n",
+    "                \"dimensions\": 1536,\n",
+    "                \"distanceFunction\": \"cosine\",\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "\n",
+    "cosmos_container_properties_test = {\"partition_key\": PartitionKey(path=\"/id\")}\n",
+    "cosmos_database_properties_test: Dict[str, Any] = {}\n",
+    "    \n",
+    "set_llm_cache(    \n",
+    "    AzureCosmosDBNoSqlSemanticCache(\n",
+    "        cosmos_client=cosmos_client,\n",
+    "        embedding=OpenAIEmbeddings(),\n",
+    "        vector_embedding_policy=get_vector_indexing_policy(),\n",
+    "        indexing_policy=get_vector_embedding_policy(),\n",
+    "        cosmos_container_properties=cosmos_container_properties_test,\n",
+    "        cosmos_database_properties=cosmos_database_properties_test,\n",
+    "    )\n",
+    ")"
+   ],
+   "id": "41fea5aa7b2153ca"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ],
+   "id": "1e1cd93819921bf6"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ],
+   "id": "576ce24c1244812a"
+  },
   {
    "cell_type": "markdown",
    "id": "306ff47b",
@@ -2868,4 +2956,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 72479e560fea5..17dee63b29477 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2106,7 +2106,7 @@ def __init__(
         ef_construction: int = 64,
         ef_search: int = 40,
         score_threshold: Optional[float] = None,
-        application_name: str = "LANGCHAIN_CACHING_PYTHON",
+        application_name: str = "LangChain-CDBNoSQL-SemanticCache-Python",
     ):
         """
         Args:
@@ -2271,7 +2271,6 @@ def clear(self, **kwargs: Any) -> None:
         index_name = self._index_name(kwargs["llm_string"])
         if index_name in self._cache_dict:
             self._cache_dict[index_name].get_collection().delete_many({})
-            # self._cache_dict[index_name].clear_collection()
 
     @staticmethod
     def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None:
@@ -2374,8 +2373,11 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
 
     def clear(self, **kwargs: Any) -> None:
         """Clear semantic cache for a given llm_string."""
-        database = self.cosmos_client.get_database_client(self.database_name)
-        database.delete_container(self.container_name)
+        cache_name = self._cache_name(llm_string=kwargs["llm-string"])
+        if cache_name in self._cache_dict:
+            container = self._cache_dict["cache_name"].get_container()
+            for item in container.read_all_items():
+                container.delete_item(item)
 
 
 class OpenSearchSemanticCache(BaseCache):
diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
index 92a450bd86f85..87789c4981433 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
@@ -80,7 +80,7 @@ def __init__(
         index_name: str = "vectorSearchIndex",
         text_key: str = "textContent",
         embedding_key: str = "vectorContent",
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
     ):
         """Constructor for AzureCosmosDBVectorSearch
 
@@ -119,7 +119,7 @@ def from_connection_string(
         connection_string: str,
         namespace: str,
         embedding: Embeddings,
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
         **kwargs: Any,
     ) -> AzureCosmosDBVectorSearch:
         """Creates an Instance of AzureCosmosDBVectorSearch
@@ -129,6 +129,7 @@ def from_connection_string(
             connection_string: The MongoDB vCore instance connection string
             namespace: The namespace (database.collection)
             embedding: The embedding utility
+            application_name:
             **kwargs: Dynamic keyword arguments
 
         Returns:
diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 8a671da921ce6..ebc9b4be1efbe 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -118,10 +118,6 @@ def __init__(
             vector_embedding_policy=self._vector_embedding_policy,
         )
 
-        self._embedding_key = self._vector_embedding_policy["vectorEmbeddings"][0][
-            "path"
-        ][1:]
-
     def add_texts(
         self,
         texts: Iterable[str],
@@ -165,7 +161,7 @@ def _insert_texts(
             {
                 "id": str(uuid.uuid4()),
                 text_key: t,
-                self._embedding_key: embedding,
+                "embedding": embedding,
                 "metadata": m,
             }
             for t, m, embedding in zip(texts, metadatas, embeddings)
@@ -274,22 +270,21 @@ def _similarity_search_with_score(
             query += "TOP @limit "
 
         query += (
-            "c.id, c.{}, c.text, c.metadata, "
-            "VectorDistance(c.@embeddingKey, @embeddings) AS SimilarityScore FROM c"
+            "c.id, c.text, c.metadata, c.embedding, "
+            "VectorDistance(c.embedding, @embeddings) AS SimilarityScore FROM c"
         )
 
         # Add where_clause if specified
         if pre_filter is not None and pre_filter.get("where_clause") is not None:
             query += " {}".format(pre_filter["where_clause"])
 
-        query += " ORDER BY VectorDistance(c.@embeddingKey, @embeddings)"
+        query += " ORDER BY VectorDistance(c.embedding, @embeddings)"
 
         # Add limit_offset_clause if specified
         if pre_filter is not None and pre_filter.get("limit_offset_clause") is not None:
             query += " {}".format(pre_filter["limit_offset_clause"])
         parameters = [
             {"name": "@limit", "value": k},
-            {"name": "@embeddingKey", "value": self._embedding_key},
             {"name": "@embeddings", "value": embeddings},
         ]
 
@@ -297,15 +292,16 @@ def _similarity_search_with_score(
 
         items = list(
             self._container.query_items(
-                query=query, parameters=parameters, enable_cross_partition_query=True
+                query=query, parameters=parameters, enable_cross_partition_query=True,
             )
         )
         for item in items:
             text = item["text"]
             metadata = item["metadata"]
+            metadata["id"] = item["id"]
             score = item["SimilarityScore"]
             if with_embedding:
-                metadata[self._embedding_key] = item[self._embedding_key]
+                metadata["embedding"] = item["embedding"]
             docs_and_scores.append(
                 (Document(page_content=text, metadata=metadata), score)
             )
@@ -369,7 +365,7 @@ def max_marginal_relevance_search_by_vector(
         # Re-ranks the docs using MMR
         mmr_doc_indexes = maximal_marginal_relevance(
             np.array(embedding),
-            [doc.metadata[self._embedding_key] for doc, _ in docs],
+            [doc.metadata["embedding"] for doc, _ in docs],
             k=k,
             lambda_mult=lambda_mult,
         )
@@ -403,3 +399,6 @@ def max_marginal_relevance_search(
             with_embedding=with_embedding,
         )
         return docs
+
+    def get_container(self):
+        return self._container
diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 2156418ebfb35..c5c28887b8484 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -7,9 +7,8 @@
 from langchain_core.outputs import Generation
 
 from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
-from tests.integration_tests.cache.fake_embeddings import (
-    FakeEmbeddings,
-)
+
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 from tests.unit_tests.llms.fake_llm import FakeLLM
 
 HOST = "COSMOS_DB_URI"
@@ -57,7 +56,7 @@ def vector_embedding_policy(distance_function: str) -> dict:
 cosmos_database_properties_test: Dict[str, Any] = {}
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -83,7 +82,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -109,7 +108,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -137,7 +136,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -165,7 +164,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -191,7 +190,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(cosmos_client) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
diff --git a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
index c8a8f87a599a4..d0285b6e42298 100644
--- a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
@@ -8,10 +8,10 @@
 import pytest
 from langchain_core.documents import Document
 
-from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores.azure_cosmos_db_no_sql import (
     AzureCosmosDBNoSqlVectorSearch,
 )
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -42,14 +42,6 @@ def partition_key() -> Any:
     return PartitionKey(path="/id")
 
 
-@pytest.fixture()
-def azure_openai_embeddings() -> Any:
-    openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
-        deployment=model_deployment, model=model_name, chunk_size=1
-    )
-    return openai_embeddings
-
-
 def safe_delete_database(cosmos_client: Any) -> None:
     cosmos_client.delete_database(database_name)
 
@@ -83,19 +75,17 @@ def test_from_documents_cosine_distance(
         self,
         cosmos_client: Any,
         partition_key: Any,
-        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         """Test end to end construction and search."""
         documents = [
-            Document(page_content="Dogs are tough.", metadata={"a": 1}),
-            Document(page_content="Cats have fluff.", metadata={"b": 1}),
-            Document(page_content="What is a sandwich?", metadata={"c": 1}),
-            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+            Document(page_content="foo", metadata={"a": 1}),
+            Document(page_content="bar", metadata={"b": 1}),
+            Document(page_content="baz", metadata={"c": 1}),
         ]
 
         store = AzureCosmosDBNoSqlVectorSearch.from_documents(
             documents,
-            azure_openai_embeddings,
+            FakeEmbeddings(),
             cosmos_client=cosmos_client,
             database_name=database_name,
             container_name=container_name,
@@ -108,29 +98,25 @@ def test_from_documents_cosine_distance(
         )
         sleep(1)  # waits for Cosmos DB to save contents to the collection
 
-        output = store.similarity_search("Dogs", k=2)
+        output = store.similarity_search("foo", k=2)
 
         assert output
-        assert output[0].page_content == "Dogs are tough."
+        assert output[0].page_content == "foo"
         safe_delete_database(cosmos_client)
 
     def test_from_texts_cosine_distance_delete_one(
         self,
         cosmos_client: Any,
         partition_key: Any,
-        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         texts = [
-            "Dogs are tough.",
-            "Cats have fluff.",
-            "What is a sandwich?",
-            "That fence is purple.",
+            "foo", "bar", "baz",
         ]
-        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        metadatas = [{"a": 1}, {"b": 1}, {"d": 1, "e": 2}]
 
         store = AzureCosmosDBNoSqlVectorSearch.from_texts(
             texts,
-            azure_openai_embeddings,
+            FakeEmbeddings(),
             metadatas,
             cosmos_client=cosmos_client,
             database_name=database_name,
@@ -144,36 +130,34 @@ def test_from_texts_cosine_distance_delete_one(
         )
         sleep(1)  # waits for Cosmos DB to save contents to the collection
 
-        output = store.similarity_search("Dogs", k=1)
+        output = store.similarity_search("foo", k=1)
         assert output
-        assert output[0].page_content == "Dogs are tough."
+        assert output[0].page_content == "foo"
 
         # delete one document
         store.delete_document_by_id(str(output[0].metadata["id"]))
         sleep(2)
 
-        output2 = store.similarity_search("Dogs", k=1)
+        output2 = store.similarity_search("foo", k=1)
         assert output2
-        assert output2[0].page_content != "Dogs are tough."
+        assert output2[0].page_content != "foo"
         safe_delete_database(cosmos_client)
 
     def test_from_documents_cosine_distance_with_filtering(
         self,
         cosmos_client: Any,
         partition_key: Any,
-        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         """Test end to end construction and search."""
         documents = [
-            Document(page_content="Dogs are tough.", metadata={"a": 1}),
-            Document(page_content="Cats have fluff.", metadata={"a": 1}),
-            Document(page_content="What is a sandwich?", metadata={"c": 1}),
-            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+            Document(page_content="foo", metadata={"a": 1}),
+            Document(page_content="bar", metadata={"a": 1}),
+            Document(page_content="baz", metadata={"d": 1, "e": 2}),
         ]
 
         store = AzureCosmosDBNoSqlVectorSearch.from_documents(
             documents,
-            azure_openai_embeddings,
+            FakeEmbeddings(),
             cosmos_client=cosmos_client,
             database_name=database_name,
             container_name=container_name,
@@ -186,20 +170,20 @@ def test_from_documents_cosine_distance_with_filtering(
         )
         sleep(1)  # waits for Cosmos DB to save contents to the collection
 
-        output = store.similarity_search("Dogs", k=4)
-        assert len(output) == 4
-        assert output[0].page_content == "Dogs are tough."
+        output = store.similarity_search("foo", k=3)
+        assert len(output) == 3
+        assert output[0].page_content == "foo"
         assert output[0].metadata["a"] == 1
 
         pre_filter = {
             "where_clause": "WHERE c.metadata.a=1",
         }
         output = store.similarity_search(
-            "Dogs", k=4, pre_filter=pre_filter, with_embedding=True
+            "foo", k=3, pre_filter=pre_filter, with_embedding=True
         )
 
         assert len(output) == 2
-        assert output[0].page_content == "Dogs are tough."
+        assert output[0].page_content == "foo"
         assert output[0].metadata["a"] == 1
 
         pre_filter = {
@@ -207,9 +191,9 @@ def test_from_documents_cosine_distance_with_filtering(
             "limit_offset_clause": "OFFSET 0 LIMIT 1",
         }
 
-        output = store.similarity_search("Dogs", k=4, pre_filter=pre_filter)
+        output = store.similarity_search("foo", k=3, pre_filter=pre_filter)
 
         assert len(output) == 1
-        assert output[0].page_content == "Dogs are tough."
+        assert output[0].page_content == "foo"
         assert output[0].metadata["a"] == 1
         safe_delete_database(cosmos_client)

From 337836512dd2d3e41a9e1fc650194144725f5a3b Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:40:55 -0700
Subject: [PATCH 24/44] linting

---
 .../vectorstores/azure_cosmos_db_no_sql.py                    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index ebc9b4be1efbe..93a655c58ffc5 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -292,7 +292,9 @@ def _similarity_search_with_score(
 
         items = list(
             self._container.query_items(
-                query=query, parameters=parameters, enable_cross_partition_query=True,
+                query=query,
+                parameters=parameters,
+                enable_cross_partition_query=True,
             )
         )
         for item in items:

From fe08580cc4a97f4e9d84a6f321702e17f5347992 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:43:37 -0700
Subject: [PATCH 25/44] linting

---
 .../langchain_community/vectorstores/azure_cosmos_db_no_sql.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 93a655c58ffc5..7454cebdd203b 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
+from azure.cosmos import ContainerProxy
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
@@ -402,5 +403,5 @@ def max_marginal_relevance_search(
         )
         return docs
 
-    def get_container(self):
+    def get_container(self) -> ContainerProxy:
         return self._container

From cb02b1a9b4da82d752632499506cc6ef36c357c9 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:49:08 -0700
Subject: [PATCH 26/44] linting

---
 .../vectorstores/azure_cosmos_db_no_sql.py          |  2 +-
 .../cache/test_azure_cosmosdbnosql_cache.py         | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 7454cebdd203b..8bd6b65b48850 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -5,7 +5,6 @@
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
-from azure.cosmos import ContainerProxy
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
@@ -14,6 +13,7 @@
 
 if TYPE_CHECKING:
     from azure.cosmos.cosmos_client import CosmosClient
+    from azure.cosmos import ContainerProxy
 
 
 class AzureCosmosDBNoSqlVectorSearch(VectorStore):
diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index c5c28887b8484..be34ed76c0581 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -7,7 +7,6 @@
 from langchain_core.outputs import Generation
 
 from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
-
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 from tests.unit_tests.llms.fake_llm import FakeLLM
 
@@ -56,7 +55,9 @@ def vector_embedding_policy(distance_function: str) -> dict:
 cosmos_database_properties_test: Dict[str, Any] = {}
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
+    cosmos_client
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -108,7 +109,9 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(cosmos_client) -> None
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
+    cosmos_client
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -164,7 +167,9 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(cosmos_client) ->
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
+    cosmos_client
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,

From bc8ee2d028b5f3c52a2db14e98f6ec37470a8cef Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:55:09 -0700
Subject: [PATCH 27/44] linting

---
 .../vectorstores/azure_cosmos_db_no_sql.py                   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 8bd6b65b48850..fb3b431397364 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -2,15 +2,14 @@
 
 import uuid
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING
 
 import numpy as np
+from langchain_community.vectorstores.utils import maximal_marginal_relevance
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
 
-from langchain_community.vectorstores.utils import maximal_marginal_relevance
-
 if TYPE_CHECKING:
     from azure.cosmos.cosmos_client import CosmosClient
     from azure.cosmos import ContainerProxy

From a5eebd979d718aacf0591b28916bfe6eca1eae55 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 16:57:51 -0700
Subject: [PATCH 28/44] linting

---
 .../langchain_community/vectorstores/azure_cosmos_db_no_sql.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index fb3b431397364..24de9e121b321 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -11,8 +11,7 @@
 from langchain_core.vectorstores import VectorStore
 
 if TYPE_CHECKING:
-    from azure.cosmos.cosmos_client import CosmosClient
-    from azure.cosmos import ContainerProxy
+    from azure.cosmos import ContainerProxy, CosmosClient
 
 
 class AzureCosmosDBNoSqlVectorSearch(VectorStore):

From 5f2c91feb9ec23685555f602b9f8439b3cc4820b Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:24:08 -0700
Subject: [PATCH 29/44] linting

---
 .../langchain_community/vectorstores/azure_cosmos_db_no_sql.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 24de9e121b321..275211b17abe9 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -2,7 +2,7 @@
 
 import uuid
 import warnings
-from typing import Any, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
 from langchain_community.vectorstores.utils import maximal_marginal_relevance

From 0441fa7c0c62c282cad32ddb972e9f5cbebe3397 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:36:06 -0700
Subject: [PATCH 30/44] linting

---
 .../langchain_community/vectorstores/azure_cosmos_db_no_sql.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 275211b17abe9..010c92d8df59b 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -5,11 +5,12 @@
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
-from langchain_community.vectorstores.utils import maximal_marginal_relevance
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
 
+from langchain_community.vectorstores.utils import maximal_marginal_relevance
+
 if TYPE_CHECKING:
     from azure.cosmos import ContainerProxy, CosmosClient
 

From 05fd4382966e88c14301377617a94d56bd6ec34c Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:41:21 -0700
Subject: [PATCH 31/44] linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py                 | 6 +++---
 .../vectorstores/test_azure_cosmos_db_no_sql.py             | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index be34ed76c0581..cf3e594e577b9 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -56,7 +56,7 @@ def vector_embedding_policy(distance_function: str) -> dict:
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
-    cosmos_client
+    cosmos_client,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -110,7 +110,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(cosmos_client) -> None
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
-    cosmos_client
+    cosmos_client,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -168,7 +168,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(cosmos_client) ->
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
-    cosmos_client
+    cosmos_client,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
diff --git a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
index d0285b6e42298..740a55089779e 100644
--- a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
@@ -110,7 +110,9 @@ def test_from_texts_cosine_distance_delete_one(
         partition_key: Any,
     ) -> None:
         texts = [
-            "foo", "bar", "baz",
+            "foo",
+            "bar",
+            "baz",
         ]
         metadatas = [{"a": 1}, {"b": 1}, {"d": 1, "e": 2}]
 

From 9e7838ca4e97d8720c8739f3f2edc7649e6b46b7 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:45:31 -0700
Subject: [PATCH 32/44] linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py    | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index cf3e594e577b9..7cad8d3547fe5 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -56,7 +56,7 @@ def vector_embedding_policy(distance_function: str) -> dict:
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
-    cosmos_client,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -83,7 +83,9 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(
+        cosmos_client: Any,
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -110,7 +112,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(cosmos_client) -> None
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
-    cosmos_client,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -139,7 +141,9 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(
+        cosmos_client: Any,
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,
@@ -168,7 +172,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(cosmos_client) ->
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
-    cosmos_client,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -195,7 +199,9 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
     get_llm_cache().clear(llm_string=llm_string)
 
 
-def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(cosmos_client) -> None:
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(
+        cosmos_client: Any,
+) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
             cosmos_client=cosmos_client,

From f4250acf5ebf0cdc9cdf8db9e89e034f1c1a931f Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:47:36 -0700
Subject: [PATCH 33/44] linting

---
 .../cache/test_azure_cosmosdbnosql_cache.py                 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
index 7cad8d3547fe5..343eb11d7c242 100644
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -84,7 +84,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(
-        cosmos_client: Any,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -142,7 +142,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(
-        cosmos_client: Any,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(
@@ -200,7 +200,7 @@ def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
 
 
 def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(
-        cosmos_client: Any,
+    cosmos_client: Any,
 ) -> None:
     set_llm_cache(
         AzureCosmosDBNoSqlSemanticCache(

From c13660c3a9a8c2217d04131b12ddd7b56a7dcb83 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:50:55 -0700
Subject: [PATCH 34/44] linting

---
 docs/docs/integrations/llm_caching.ipynb | 36 +++++++++++++-----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 5a1ce0973ef3d..dcb4d6ae9a29c 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -1855,20 +1855,21 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "id": "235ff73bf7143f13",
+   "metadata": {},
    "source": [
     "## Azure CosmosDB NoSql Semantic Cache\n",
     "\n",
     "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
-   ],
-   "id": "235ff73bf7143f13"
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
-   "outputs": [],
    "execution_count": null,
+   "id": "41fea5aa7b2153ca",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from typing import Any, Dict\n",
     "from azure.cosmos import CosmosClient, PartitionKey\n",
@@ -1881,6 +1882,7 @@
     "\n",
     "cosmos_client = CosmosClient(HOST, KEY)\n",
     "\n",
+    "\n",
     "def get_vector_indexing_policy() -> dict:\n",
     "    return {\n",
     "        \"indexingMode\": \"consistent\",\n",
@@ -1902,10 +1904,11 @@
     "        ]\n",
     "    }\n",
     "\n",
+    "\n",
     "cosmos_container_properties_test = {\"partition_key\": PartitionKey(path=\"/id\")}\n",
     "cosmos_database_properties_test: Dict[str, Any] = {}\n",
-    "    \n",
-    "set_llm_cache(    \n",
+    "\n",
+    "set_llm_cache(\n",
     "    AzureCosmosDBNoSqlSemanticCache(\n",
     "        cosmos_client=cosmos_client,\n",
     "        embedding=OpenAIEmbeddings(),\n",
@@ -1915,32 +1918,31 @@
     "        cosmos_database_properties=cosmos_database_properties_test,\n",
     "    )\n",
     ")"
-   ],
-   "id": "41fea5aa7b2153ca"
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
-   "outputs": [],
    "execution_count": null,
+   "id": "1e1cd93819921bf6",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
     "llm.invoke(\"Tell me a joke\")"
-   ],
-   "id": "1e1cd93819921bf6"
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
-   "outputs": [],
    "execution_count": null,
+   "id": "576ce24c1244812a",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
     "llm.invoke(\"Tell me a joke\")"
-   ],
-   "id": "576ce24c1244812a"
+   ]
   },
   {
    "cell_type": "markdown",

From 3f231356f5b809913e22269ea15e495541a8e2c2 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Wed, 25 Sep 2024 17:54:37 -0700
Subject: [PATCH 35/44] linting

---
 docs/docs/integrations/llm_caching.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index dcb4d6ae9a29c..a0df6dc326140 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -1872,11 +1872,11 @@
    "outputs": [],
    "source": [
     "from typing import Any, Dict\n",
+    "\n",
     "from azure.cosmos import CosmosClient, PartitionKey\n",
     "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
-    "\n",
     "HOST = \"COSMOS_DB_URI\"\n",
     "KEY = \"COSMOS_DB_KEY\"\n",
     "\n",

From 7fc0f593f0f409fb995348d1604a68912234c918 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Thu, 26 Sep 2024 09:53:43 -0700
Subject: [PATCH 36/44] Adding support for managed identity for cosmosdb nosql
 VS

---
 .../vectorstores/azure_cosmos_db_no_sql.py    | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 010c92d8df59b..a2d8035237578 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -13,6 +13,7 @@
 
 if TYPE_CHECKING:
     from azure.cosmos import ContainerProxy, CosmosClient
+    from azure.identity import ClientSecretCredential
 
 
 class AzureCosmosDBNoSqlVectorSearch(VectorStore):
@@ -238,6 +239,44 @@ def from_texts(
         )
         return vectorstore
 
+    @classmethod
+    def from_connection_string_and_aad(
+        cls,
+        connection_string: str,
+        clientSecretCredential: ClientSecretCredential,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ** kwargs: Any,
+    ) -> AzureCosmosDBNoSqlVectorSearch:
+        cosmos_client = CosmosClient(connection_string, clientSecretCredential)
+        kwargs["cosmos_client"] = cosmos_client
+        vectorstore = cls._from_kwargs(embedding, **kwargs)
+        vectorstore.add_texts(
+            texts=texts,
+            metadatas=metadatas,
+        )
+        return vectorstore
+
+    @classmethod
+    def from_connection_string_and_key(
+            cls,
+            connection_string: str,
+            key: str,
+            texts: List[str],
+            embedding: Embeddings,
+            metadatas: Optional[List[dict]] = None,
+            **kwargs: Any,
+    ) -> AzureCosmosDBNoSqlVectorSearch:
+        cosmos_client = CosmosClient(connection_string, key)
+        kwargs["cosmos_client"] = cosmos_client
+        vectorstore = cls._from_kwargs(embedding, **kwargs)
+        vectorstore.add_texts(
+            texts=texts,
+            metadatas=metadatas,
+        )
+        return vectorstore
+
     def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
         if ids is None:
             raise ValueError("No document ids provided to delete.")

From 224393c5de49aa85ae235132245f84c298eeee1a Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Thu, 26 Sep 2024 10:00:20 -0700
Subject: [PATCH 37/44] linting

---
 .../vectorstores/azure_cosmos_db_no_sql.py       | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index a2d8035237578..99ee349389cbd 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -247,7 +247,7 @@ def from_connection_string_and_aad(
         texts: List[str],
         embedding: Embeddings,
         metadatas: Optional[List[dict]] = None,
-        ** kwargs: Any,
+        **kwargs: Any,
     ) -> AzureCosmosDBNoSqlVectorSearch:
         cosmos_client = CosmosClient(connection_string, clientSecretCredential)
         kwargs["cosmos_client"] = cosmos_client
@@ -260,13 +260,13 @@ def from_connection_string_and_aad(
 
     @classmethod
     def from_connection_string_and_key(
-            cls,
-            connection_string: str,
-            key: str,
-            texts: List[str],
-            embedding: Embeddings,
-            metadatas: Optional[List[dict]] = None,
-            **kwargs: Any,
+        cls,
+        connection_string: str,
+        key: str,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
     ) -> AzureCosmosDBNoSqlVectorSearch:
         cosmos_client = CosmosClient(connection_string, key)
         kwargs["cosmos_client"] = cosmos_client

From cbd6f2b90258a8288683afe4654fa44d546a908b Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Thu, 10 Oct 2024 12:24:51 -0700
Subject: [PATCH 38/44] Adding user agent for vector store

---
 .../vectorstores/azure_cosmos_db_no_sql.py                | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 99ee349389cbd..646b60faeea72 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -15,6 +15,8 @@
     from azure.cosmos import ContainerProxy, CosmosClient
     from azure.identity import ClientSecretCredential
 
+USER_AGENT = ("LlamaIndex-CDBNoSql-VectorStore-Python",)
+
 
 class AzureCosmosDBNoSqlVectorSearch(VectorStore):
     """`Azure Cosmos DB for NoSQL` vector store.
@@ -249,7 +251,9 @@ def from_connection_string_and_aad(
         metadatas: Optional[List[dict]] = None,
         **kwargs: Any,
     ) -> AzureCosmosDBNoSqlVectorSearch:
-        cosmos_client = CosmosClient(connection_string, clientSecretCredential)
+        cosmos_client = CosmosClient(
+            connection_string, clientSecretCredential, user_agent=USER_AGENT
+        )
         kwargs["cosmos_client"] = cosmos_client
         vectorstore = cls._from_kwargs(embedding, **kwargs)
         vectorstore.add_texts(
@@ -268,7 +272,7 @@ def from_connection_string_and_key(
         metadatas: Optional[List[dict]] = None,
         **kwargs: Any,
     ) -> AzureCosmosDBNoSqlVectorSearch:
-        cosmos_client = CosmosClient(connection_string, key)
+        cosmos_client = CosmosClient(connection_string, key, user_agent=USER_AGENT)
         kwargs["cosmos_client"] = cosmos_client
         vectorstore = cls._from_kwargs(embedding, **kwargs)
         vectorstore.add_texts(

From 9c3bdcbb6c8c19e80db600a997f143bf62d74877 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Fri, 1 Nov 2024 05:54:51 -0700
Subject: [PATCH 39/44] Resolving comments

---
 libs/community/langchain_community/cache.py   |  2 +-
 .../vectorstores/azure_cosmos_db_no_sql.py    | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 17dee63b29477..8c9c16668e81f 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2106,7 +2106,7 @@ def __init__(
         ef_construction: int = 64,
         ef_search: int = 40,
         score_threshold: Optional[float] = None,
-        application_name: str = "LangChain-CDBNoSQL-SemanticCache-Python",
+        application_name: str = "LangChain-CDBMongoVCore-SemanticCache-Python",
     ):
         """
         Args:
diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 646b60faeea72..ed8e65cf88234 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -15,7 +15,7 @@
     from azure.cosmos import ContainerProxy, CosmosClient
     from azure.identity import ClientSecretCredential
 
-USER_AGENT = ("LlamaIndex-CDBNoSql-VectorStore-Python",)
+USER_AGENT = ("LangChain-CDBNoSql-VectorStore-Python",)
 
 
 class AzureCosmosDBNoSqlVectorSearch(VectorStore):
@@ -121,6 +121,10 @@ def __init__(
             vector_embedding_policy=self._vector_embedding_policy,
         )
 
+        self._embedding_key = self._vector_embedding_policy["vectorEmbeddings"][0][
+                                  "path"
+                              ][1:]
+
     def add_texts(
         self,
         texts: Iterable[str],
@@ -164,7 +168,7 @@ def _insert_texts(
             {
                 "id": str(uuid.uuid4()),
                 text_key: t,
-                "embedding": embedding,
+                self._embedding_key: embedding,
                 "metadata": m,
             }
             for t, m, embedding in zip(texts, metadatas, embeddings)
@@ -313,21 +317,22 @@ def _similarity_search_with_score(
             query += "TOP @limit "
 
         query += (
-            "c.id, c.text, c.metadata, c.embedding, "
-            "VectorDistance(c.embedding, @embeddings) AS SimilarityScore FROM c"
+            "c.id, c[@embeddingKey], c.text, c.metadata, "
+            "VectorDistance(c[@embeddingKey], @embeddings) AS SimilarityScore FROM c"
         )
 
         # Add where_clause if specified
         if pre_filter is not None and pre_filter.get("where_clause") is not None:
             query += " {}".format(pre_filter["where_clause"])
 
-        query += " ORDER BY VectorDistance(c.embedding, @embeddings)"
+        query += " ORDER BY VectorDistance(c[@embeddingKey], @embeddings)"
 
         # Add limit_offset_clause if specified
         if pre_filter is not None and pre_filter.get("limit_offset_clause") is not None:
             query += " {}".format(pre_filter["limit_offset_clause"])
         parameters = [
             {"name": "@limit", "value": k},
+            {"name": "@embeddingKey", "value": self._embedding_key},
             {"name": "@embeddings", "value": embeddings},
         ]
 
@@ -346,7 +351,7 @@ def _similarity_search_with_score(
             metadata["id"] = item["id"]
             score = item["SimilarityScore"]
             if with_embedding:
-                metadata["embedding"] = item["embedding"]
+                metadata[self._embedding_key] = item[self._embedding_key]
             docs_and_scores.append(
                 (Document(page_content=text, metadata=metadata), score)
             )
@@ -410,7 +415,7 @@ def max_marginal_relevance_search_by_vector(
         # Re-ranks the docs using MMR
         mmr_doc_indexes = maximal_marginal_relevance(
             np.array(embedding),
-            [doc.metadata["embedding"] for doc, _ in docs],
+            [doc.metadata[self._embedding_key] for doc, _ in docs],
             k=k,
             lambda_mult=lambda_mult,
         )

From 1c1a1e8935a5e003a88d2710e4a92f74cc7a0b75 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Fri, 1 Nov 2024 05:56:52 -0700
Subject: [PATCH 40/44] Fixing merge conflicts

---
 .../vectorstores/azure_cosmos_db_no_sql.py             | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index ed8e65cf88234..7baa51a17a1e0 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -122,8 +122,8 @@ def __init__(
         )
 
         self._embedding_key = self._vector_embedding_policy["vectorEmbeddings"][0][
-                                  "path"
-                              ][1:]
+            "path"
+        ][1:]
 
     def add_texts(
         self,
@@ -317,15 +317,15 @@ def _similarity_search_with_score(
             query += "TOP @limit "
 
         query += (
-            "c.id, c[@embeddingKey], c.text, c.metadata, "
-            "VectorDistance(c[@embeddingKey], @embeddings) AS SimilarityScore FROM c"
+            "c.id, c.{}, c.text, c.metadata, "
+            "VectorDistance(c.@embeddingKey, @embeddings) AS SimilarityScore FROM c"
         )
 
         # Add where_clause if specified
         if pre_filter is not None and pre_filter.get("where_clause") is not None:
             query += " {}".format(pre_filter["where_clause"])
 
-        query += " ORDER BY VectorDistance(c[@embeddingKey], @embeddings)"
+        query += " ORDER BY VectorDistance(c.@embeddingKey, @embeddings)"
 
         # Add limit_offset_clause if specified
         if pre_filter is not None and pre_filter.get("limit_offset_clause") is not None:

From a4b7d1a5c6b7d009ebfb332ac8d03bca7a10d5f6 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Thu, 5 Dec 2024 16:59:22 -0800
Subject: [PATCH 41/44] Sample

---
 docs/docs/integrations/llm_caching.ipynb      | 120 ++++++++++++++----
 .../vectorstores/azure_cosmos_db_no_sql.py    |   2 +-
 2 files changed, 96 insertions(+), 26 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 9c7ac8673f99f..5b4fa5cfccde1 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -14,10 +14,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "88486f6f",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:53:54.575978Z",
+     "start_time": "2024-12-06T00:53:38.515785Z"
+    }
+   },
    "source": [
     "%pip install -qU langchain-openai langchain-community\n",
     "\n",
@@ -26,19 +29,30 @@
     "\n",
     "if \"OPENAI_API_KEY\" not in os.environ:\n",
     "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
-   ]
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0mNote: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "execution_count": 1
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "10ad9224",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-04-12T02:05:57.319706Z",
-     "start_time": "2024-04-12T02:05:57.303868Z"
+     "end_time": "2024-12-06T00:54:06.474593Z",
+     "start_time": "2024-12-06T00:53:58.727138Z"
     }
    },
-   "outputs": [],
    "source": [
     "from langchain.globals import set_llm_cache\n",
     "from langchain_openai import OpenAI\n",
@@ -46,7 +60,9 @@
     "# To make the caching really obvious, lets use a slower and older model.\n",
     "# Caching supports newer chat models as well.\n",
     "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": 2
   },
   {
    "cell_type": "markdown",
@@ -1866,15 +1882,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "41fea5aa7b2153ca",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:38.648972Z",
+     "start_time": "2024-12-06T00:55:38.290541Z"
+    }
+   },
    "source": [
+    "from libs.community.langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from typing import Any, Dict\n",
     "\n",
     "from azure.cosmos import CosmosClient, PartitionKey\n",
-    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "# from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
     "HOST = \"COSMOS_DB_URI\"\n",
@@ -1888,7 +1908,7 @@
     "        \"indexingMode\": \"consistent\",\n",
     "        \"includedPaths\": [{\"path\": \"/*\"}],\n",
     "        \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n",
-    "        \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"quantized_flat\"}],\n",
+    "        \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"diskANN\"}],\n",
     "    }\n",
     "\n",
     "\n",
@@ -1912,37 +1932,87 @@
     "    AzureCosmosDBNoSqlSemanticCache(\n",
     "        cosmos_client=cosmos_client,\n",
     "        embedding=OpenAIEmbeddings(),\n",
-    "        vector_embedding_policy=get_vector_indexing_policy(),\n",
-    "        indexing_policy=get_vector_embedding_policy(),\n",
+    "        vector_embedding_policy=get_vector_embedding_policy(),\n",
+    "        indexing_policy=get_vector_indexing_policy(),\n",
     "        cosmos_container_properties=cosmos_container_properties_test,\n",
     "        cosmos_database_properties=cosmos_database_properties_test,\n",
     "    )\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": 5
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "1e1cd93819921bf6",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:44.513080Z",
+     "start_time": "2024-12-06T00:55:41.353843Z"
+    }
+   },
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
     "llm.invoke(\"Tell me a joke\")"
-   ]
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 374 ms, sys: 34.2 ms, total: 408 ms\n",
+      "Wall time: 3.15 s\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 6
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "576ce24c1244812a",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:50.925865Z",
+     "start_time": "2024-12-06T00:55:50.548520Z"
+    }
+   },
    "source": [
     "%%time\n",
     "# The first time, it is not yet in cache, so it should take longer\n",
     "llm.invoke(\"Tell me a joke\")"
-   ]
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 17.7 ms, sys: 2.88 ms, total: 20.6 ms\n",
+      "Wall time: 373 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 8
   },
   {
    "cell_type": "markdown",
diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 2ad13c5b90757..7fa6d4e1f0a3f 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -347,7 +347,7 @@ def _similarity_search_with_score(
         )
         for item in items:
             text = item["text"]
-            metadata = item["metadata"]
+            metadata = item.pop("metadata", {})
             metadata["id"] = item["id"]
             score = item["SimilarityScore"]
             if with_embedding:

From d237cd57a5e77d9fc966d452713dc240167ef835 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Thu, 5 Dec 2024 17:03:18 -0800
Subject: [PATCH 42/44] linting

---
 docs/docs/integrations/llm_caching.ipynb | 68 ++++++++++++------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 5b4fa5cfccde1..1987cef11b02b 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -14,6 +14,7 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "88486f6f",
    "metadata": {
     "ExecuteTime": {
@@ -21,31 +22,31 @@
      "start_time": "2024-12-06T00:53:38.515785Z"
     }
    },
-   "source": [
-    "%pip install -qU langchain-openai langchain-community\n",
-    "\n",
-    "import os\n",
-    "from getpass import getpass\n",
-    "\n",
-    "if \"OPENAI_API_KEY\" not in os.environ:\n",
-    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
-   ],
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0mNote: you may need to restart the kernel to use updated packages.\n"
+      "\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
+      "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
+      "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
+      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
      ]
     }
    ],
-   "execution_count": 1
+   "source": [
+    "%pip install -qU langchain-openai langchain-community\n",
+    "\n",
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "10ad9224",
    "metadata": {
     "ExecuteTime": {
@@ -53,6 +54,7 @@
      "start_time": "2024-12-06T00:53:58.727138Z"
     }
    },
+   "outputs": [],
    "source": [
     "from langchain.globals import set_llm_cache\n",
     "from langchain_openai import OpenAI\n",
@@ -60,9 +62,7 @@
     "# To make the caching really obvious, lets use a slower and older model.\n",
     "# Caching supports newer chat models as well.\n",
     "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
-   ],
-   "outputs": [],
-   "execution_count": 2
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1882,6 +1882,7 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "41fea5aa7b2153ca",
    "metadata": {
     "ExecuteTime": {
@@ -1889,14 +1890,17 @@
      "start_time": "2024-12-06T00:55:38.290541Z"
     }
    },
+   "outputs": [],
    "source": [
-    "from libs.community.langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from typing import Any, Dict\n",
     "\n",
     "from azure.cosmos import CosmosClient, PartitionKey\n",
+    "\n",
     "# from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
+    "from libs.community.langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "\n",
     "HOST = \"COSMOS_DB_URI\"\n",
     "KEY = \"COSMOS_DB_KEY\"\n",
     "\n",
@@ -1938,12 +1942,11 @@
     "        cosmos_database_properties=cosmos_database_properties_test,\n",
     "    )\n",
     ")"
-   ],
-   "outputs": [],
-   "execution_count": 5
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "1e1cd93819921bf6",
    "metadata": {
     "ExecuteTime": {
@@ -1951,11 +1954,6 @@
      "start_time": "2024-12-06T00:55:41.353843Z"
     }
    },
-   "source": [
-    "%%time\n",
-    "# The first time, it is not yet in cache, so it should take longer\n",
-    "llm.invoke(\"Tell me a joke\")"
-   ],
    "outputs": [
     {
      "name": "stdout",
@@ -1976,10 +1974,15 @@
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 6
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "576ce24c1244812a",
    "metadata": {
     "ExecuteTime": {
@@ -1987,11 +1990,6 @@
      "start_time": "2024-12-06T00:55:50.548520Z"
     }
    },
-   "source": [
-    "%%time\n",
-    "# The first time, it is not yet in cache, so it should take longer\n",
-    "llm.invoke(\"Tell me a joke\")"
-   ],
    "outputs": [
     {
      "name": "stdout",
@@ -2012,7 +2010,11 @@
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 8
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
   },
   {
    "cell_type": "markdown",

From a7a4544bc9de9ce1f64da1dfc5e63a4c0c04efd0 Mon Sep 17 00:00:00 2001
From: Aayush Kataria <aayushkataria3011@gmail.com>
Date: Mon, 16 Dec 2024 09:23:52 -0800
Subject: [PATCH 43/44] Fixing merge conflicts

---
 docs/docs/integrations/llm_caching.ipynb              |  8 ++++----
 libs/community/langchain_community/cache.py           |  3 +++
 .../vectorstores/azure_cosmos_db_no_sql.py            |  5 ++++-
 .../vectorstores/test_azure_cosmos_db_no_sql.py       | 11 +++++++----
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 1987cef11b02b..3c88ae1fd5af7 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -27,10 +27,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
-      "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
-      "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001b[0m\u001b[33m\r\n",
-      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
+      "\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
+      "\u001B[0mNote: you may need to restart the kernel to use updated packages.\n"
      ]
     }
    ],
diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index 0c2fa35a88a22..11a704224e506 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2293,6 +2293,7 @@ def __init__(
         indexing_policy: Dict[str, Any],
         cosmos_container_properties: Dict[str, Any],
         cosmos_database_properties: Dict[str, Any],
+        create_container: bool = True,
     ):
         self.cosmos_client = cosmos_client
         self.database_name = database_name
@@ -2302,6 +2303,7 @@ def __init__(
         self.indexing_policy = indexing_policy
         self.cosmos_container_properties = cosmos_container_properties
         self.cosmos_database_properties = cosmos_database_properties
+        self.create_container = create_container
         self._cache_dict: Dict[str, AzureCosmosDBNoSqlVectorSearch] = {}
 
     def _cache_name(self, llm_string: str) -> str:
@@ -2326,6 +2328,7 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
                 cosmos_database_properties=self.cosmos_database_properties,
                 database_name=self.database_name,
                 container_name=self.container_name,
+                create_container=self.create_container,
             )
 
         return self._cache_dict[cache_name]
diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
index 48110a182a982..8d0d90dd92e9c 100644
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@@ -14,7 +14,7 @@
 from langchain_community.vectorstores.utils import maximal_marginal_relevance
 
 if TYPE_CHECKING:
-    from azure.cosmos import CosmosClient
+    from azure.cosmos import ContainerProxy, CosmosClient
     from azure.identity import DefaultAzureCredential
 
 USER_AGENT = ("LangChain-CDBNoSql-VectorStore-Python",)
@@ -859,3 +859,6 @@ def _where_clause_operator_map(self) -> Dict[str, str]:
             "$full_text_contains_any": "FullTextContainsAny",
         }
         return operator_map
+
+    def get_container(self) -> ContainerProxy:
+        return self._container
diff --git a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
index d56979d0b9197..bbaca0775be7c 100644
--- a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
@@ -8,13 +8,13 @@
 import pytest
 from langchain_core.documents import Document
 
+from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores.azure_cosmos_db_no_sql import (
     AzureCosmosDBNoSqlVectorSearch,
     Condition,
     CosmosDBQueryType,
     PreFilter,
 )
-from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -86,13 +86,14 @@ def test_from_documents_cosine_distance(
         self,
         cosmos_client: Any,
         partition_key: Any,
+        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         """Test end to end construction and search."""
         documents = self._get_documents()
 
         store = AzureCosmosDBNoSqlVectorSearch.from_documents(
             documents,
-            FakeEmbeddings(),
+            embedding=azure_openai_embeddings,
             cosmos_client=cosmos_client,
             database_name=database_name,
             container_name=container_name,
@@ -118,12 +119,13 @@ def test_from_texts_cosine_distance_delete_one(
         self,
         cosmos_client: Any,
         partition_key: Any,
+        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         texts, metadatas = self._get_texts_and_metadata()
 
         store = AzureCosmosDBNoSqlVectorSearch.from_texts(
             texts,
-            FakeEmbeddings(),
+            azure_openai_embeddings,
             metadatas,
             cosmos_client=cosmos_client,
             database_name=database_name,
@@ -158,13 +160,14 @@ def test_from_documents_cosine_distance_with_filtering(
         self,
         cosmos_client: Any,
         partition_key: Any,
+        azure_openai_embeddings: OpenAIEmbeddings,
     ) -> None:
         """Test end to end construction and search."""
         documents = self._get_documents()
 
         store = AzureCosmosDBNoSqlVectorSearch.from_documents(
             documents,
-            FakeEmbeddings(),
+            embedding=azure_openai_embeddings,
             cosmos_client=cosmos_client,
             database_name=database_name,
             container_name=container_name,

From 29044fcc8f2143043171bf2257413d48f209a78a Mon Sep 17 00:00:00 2001
From: Chester Curme <chester.curme@gmail.com>
Date: Mon, 16 Dec 2024 21:22:18 -0500
Subject: [PATCH 44/44] fix doc

---
 docs/docs/integrations/llm_caching.ipynb | 39 ++++++------------------
 1 file changed, 10 insertions(+), 29 deletions(-)

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index 3c88ae1fd5af7..5356f90a7537c 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -14,26 +14,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "88486f6f",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-06T00:53:54.575978Z",
-     "start_time": "2024-12-06T00:53:38.515785Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0m\u001B[33mWARNING: Ignoring invalid distribution ~angchain (/Users/aayushkataria/anaconda3/lib/python3.11/site-packages)\u001B[0m\u001B[33m\r\n",
-      "\u001B[0mNote: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "f938e881",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "%pip install -qU langchain-openai langchain-community\n",
     "\n",
@@ -1836,7 +1820,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": null,
    "id": "bc1570a2a77b58c8",
    "metadata": {
     "ExecuteTime": {
@@ -1866,7 +1850,7 @@
    ],
    "source": [
     "%%time\n",
-    "# The first time, it is not yet in cache, so it should take longer\n",
+    "# The second time it is, so it goes faster\n",
     "llm.invoke(\"Tell me a joke\")"
    ]
   },
@@ -1882,7 +1866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "41fea5aa7b2153ca",
    "metadata": {
     "ExecuteTime": {
@@ -1895,12 +1879,9 @@
     "from typing import Any, Dict\n",
     "\n",
     "from azure.cosmos import CosmosClient, PartitionKey\n",
-    "\n",
-    "# from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
-    "from libs.community.langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
-    "\n",
     "HOST = \"COSMOS_DB_URI\"\n",
     "KEY = \"COSMOS_DB_KEY\"\n",
     "\n",
@@ -1982,7 +1963,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "576ce24c1244812a",
    "metadata": {
     "ExecuteTime": {
@@ -2012,7 +1993,7 @@
    ],
    "source": [
     "%%time\n",
-    "# The first time, it is not yet in cache, so it should take longer\n",
+    "# The second time it is, so it goes faster\n",
     "llm.invoke(\"Tell me a joke\")"
    ]
   },