Skip to content

Commit

Permalink
fix more nits in mongodb notebook (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
benjibc authored Mar 1, 2024
1 parent 005b51a commit 58552fc
Showing 1 changed file with 37 additions and 24 deletions.
61 changes: 37 additions & 24 deletions examples/rag/mongo_basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -96,13 +96,13 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from typing import List\n",
"\n",
"def generate_embeddings(input_texts: List[str], model_api_string: str, prefix=\"\") -> List[List[float]]:\n",
"def generate_embeddings(input_texts: str, model_api_string: str, prefix=\"\") -> List[float]:\n",
" \"\"\"Generate embeddings from Fireworks python library\n",
"\n",
" Args:\n",
Expand All @@ -115,11 +115,17 @@
" \"\"\"\n",
" if prefix:\n",
" input_texts = [prefix + text for text in input_texts] \n",
" return [x.embedding for x in \n",
" fw_client.embeddings.create(\n",
" return fw_client.embeddings.create(\n",
" input=input_texts,\n",
" model=model_api_string,\n",
" ).data]"
" ).data[0].embedding"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the function above, we did not implement batching and always return the embedding at position zero. For how to do batching, we will cover it in the next tutorial."
]
},
{
Expand All @@ -132,7 +138,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 30,
"metadata": {},
"outputs": [
{
Expand All @@ -145,18 +151,26 @@
],
"source": [
"embedding_model_string = 'nomic-ai/nomic-embed-text-v1.5'\n",
"vector_database_field_name = 'embeddings' # define your embedding field name.\n",
"vector_database_field_name = 'embed' # define your embedding field name.\n",
"NUM_DOC_LIMIT = 2000 # the number of documents you will process and generate embeddings.\n",
"\n",
"sample_output = generate_embeddings([\"This is a test.\"], embedding_model_string)\n",
"print(f\"Embedding size is: {str(len(sample_output[0]))}\")\n"
"print(f\"Embedding size is: {str(len(sample_output))}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 31,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Document Processing : 2000it [01:56, 17.22it/s]\n"
]
}
],
"source": [
"from tqdm import tqdm\n",
"from datetime import datetime\n",
Expand All @@ -173,7 +187,7 @@
").limit(NUM_DOC_LIMIT), desc=\"Document Processing \"):\n",
" extracted_str = \"\\n\".join([k + \": \" + str(doc[k]) for k in keys_to_extract if k in doc])\n",
" if vector_database_field_name not in doc:\n",
" doc[vector_database_field_name] = generate_embeddings([extracted_str], embedding_model_string, \"search_document: \")[0]\n",
" doc[vector_database_field_name] = generate_embeddings([extracted_str], embedding_model_string, \"search_document: \")\n",
" collection.replace_one({'_id': doc['_id']}, doc)\n"
]
},
Expand Down Expand Up @@ -207,7 +221,7 @@
" \"fields\": [\n",
" {\n",
" \"type\": \"vector\",\n",
" \"path\": \"embeddings\",\n",
" \"path\": \"embed\",\n",
" \"numDimensions\": 768,\n",
" \"similarity\": \"dotProduct\"\n",
" }\n",
Expand All @@ -227,14 +241,13 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"show updated input texts ['search_query: I like Christmas movies, any recommendations?']\n",
"From your query \"I like Christmas movies, any recommendations?\", the following movie listings were found:\n",
"\n",
"1. Surviving Christmas\n",
Expand All @@ -254,7 +267,7 @@
"# Example query.\n",
"query = \"I like Christmas movies, any recommendations?\"\n",
"prefix=\"search_query: \"\n",
"query_emb = generate_embeddings([query], embedding_model_string, prefix=prefix)[0]\n",
"query_emb = generate_embeddings([query], embedding_model_string, prefix=prefix)\n",
"\n",
"results = collection.aggregate([\n",
" {\n",
Expand Down Expand Up @@ -284,7 +297,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -311,19 +324,19 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Based on the user's query, I would recommend the following Christmas movies from the provided data:\n",
"Based on the user's query, I would recommend the following Christmas movies from the given data:\n",
"\n",
"1. \"Love Actually\" - A romantic comedy that takes place in the five weeks preceding Christmas, following the lives of eight couples in dealing with their love lives in various interrelated tales all set in London, England.\n",
"2. \"How the Grinch Stole Christmas\" - A live-action adaptation of Dr. Seuss's classic holiday tale about a green, revenge-seeking Grinch who decides to ruin Christmas for the cheery residents of Whoville.\n",
"3. \"Surviving Christmas\" - A comedy about a wealthy Chicago advertisement executive who, after being left by his girlfriend right before Christmas, hires a family to spend the holiday with him in his childhood home.\n",
"4. \"Christmas Carol: The Movie\" - An animated retelling of Charles Dickens' classic story, where Ebenezer Scrooge learns\n"
"1. \"Love Actually\" (2003) - A romantic comedy that follows the lives of eight couples in London during the Christmas season, dealing with various aspects of love and relationships.\n",
"2. \"The Grinch\" (2000) - A family-friendly animated film about the Grinch, a creature who despises Christmas and sets out to steal it from the residents of Whoville, but is eventually won over by the spirit of the holiday.\n",
"3. \"Surviving Christmas\" (2004) - A comedy about a wealthy man who hires a family to spend Christmas with him in his childhood home, leading to unexpected consequences and a journey of self-discovery.\n",
"4. \"Christmas Carol: The Movie\" (2001) - An animated retelling of Charles Dickens' classic story of E\n"
]
}
],
Expand Down

0 comments on commit 58552fc

Please sign in to comment.