Skip to content

Commit

Permalink
update rag_notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
qingzhong1 committed Jan 2, 2024
1 parent 2d42ec2 commit b726db7
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"metadata": {},
"outputs": [],
"source": [
"! pip install langchain"
"!pip install langchain"
]
},
{
Expand Down Expand Up @@ -93,16 +93,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"--2024-01-02 02:23:31-- https://paddlenlp.bj.bcebos.com/datasets/examples/construction_regulations.tar\n",
"--2024-01-02 02:52:27-- https://paddlenlp.bj.bcebos.com/datasets/examples/construction_regulations.tar\n",
"Resolving paddlenlp.bj.bcebos.com (paddlenlp.bj.bcebos.com)... 36.110.192.178, 2409:8c04:1001:1002:0:ff:b001:368a\n",
"Connecting to paddlenlp.bj.bcebos.com (paddlenlp.bj.bcebos.com)|36.110.192.178|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 1239040 (1.2M) [application/x-tar]\n",
"Saving to: ‘construction_regulations.tar’\n",
"\n",
"construction_regula 100%[===================>] 1.18M 4.92MB/s in 0.2s \n",
"construction_regula 100%[===================>] 1.18M 874KB/s in 1.4s \n",
"\n",
"2024-01-02 02:23:32 (4.92 MB/s) - ‘construction_regulations.tar’ saved [1239040/1239040]\n",
"2024-01-02 02:52:28 (874 KB/s) - ‘construction_regulations.tar’ saved [1239040/1239040]\n",
"\n",
"construction_regulations/\n",
"construction_regulations/城市管理执法办法.pdf\n",
Expand Down Expand Up @@ -159,17 +159,18 @@
"outputs": [],
"source": [
"class FaissSearch:\n",
" def __init__(self, db):\n",
" # 类的初始化方法,接收一个数据库实例并将其存储在类的实例变量 self.db 中\n",
" def __init__(self, db, embeddings):\n",
" # 类的初始化方法,接收一个数据库实例并将其存储在类的实例变量 self.db 中,接收一个embeddings方法传到self.embeddings中\n",
" self.db = db\n",
" self.embeddings = embeddings\n",
"\n",
" def search(self, query: str, top_k: int = 10, **kwargs):\n",
" # 定义一个搜索方法,接受一个查询字符串 'query' 和一个整数 'top_k',默认为 10\n",
" docs = self.db.similarity_search(query, top_k)\n",
" # 调用数据库的 similarity_search 方法来获取与查询最相关的文档\n",
" para_result = embeddings.embed_documents([i.page_content for i in docs])\n",
" para_result = self.embeddings.embed_documents([i.page_content for i in docs])\n",
" # 对获取的文档内容进行嵌入(embedding),以便进行相似性比较\n",
" query_result = embeddings.embed_query(query)\n",
" query_result = self.embeddings.embed_query(query)\n",
" # 对查询字符串也进行嵌入\n",
" similarities = cosine_similarity([query_result], para_result).reshape((-1,))\n",
" # 计算查询嵌入和文档嵌入之间的余弦相似度\n",
Expand Down Expand Up @@ -216,15 +217,15 @@
"output_type": "stream",
"text": [
"Created a chunk of size 408, which is longer than the specified 320\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe094cd3b80 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe094cd2a40 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe094cd3fa0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f9ba0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f9690 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f95d0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f8fa0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f8220 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe0884f92d0 state=finished raised RateLimitError>\n"
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f24ddfb20 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f24ddf490 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f24ddce80 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f24ddf460 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3edc1946a0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3edc195ab0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3edc194e80 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3edc1946a0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3edc194ca0 state=finished raised RateLimitError>\n"
]
}
],
Expand Down Expand Up @@ -261,8 +262,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe08fd450f0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7fe08fd44b20 state=finished raised RateLimitError>\n"
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f1fd290c0 state=finished raised RateLimitError>\n",
"Retrying requests: Attempt 1 ended with: <Future at 0x7f3f1fd28310 state=finished raised RateLimitError>\n"
]
},
{
Expand Down Expand Up @@ -362,7 +363,7 @@
}
],
"source": [
"faiss_search = FaissSearch(db=db)\n",
"faiss_search = FaissSearch(db=db, embeddings=embeddings)\n",
"res = faiss_search.search(query=\"城市管理执法主管部门的职责是什么?\")\n",
"from pprint import pprint\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"metadata": {},
"outputs": [],
"source": [
"! pip install llama-index"
"!pip install llama-index"
]
},
{
Expand Down

0 comments on commit b726db7

Please sign in to comment.