Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added demo to talk to ACL with GPT #1556

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions pyserini/demo/aclchatgpt/chatbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from dataclasses import dataclass
from argparse import ArgumentParser
from typing import Optional

import semantic_kernel as sk
from semantic_kernel.connectors.ai.open_ai import OpenAITextCompletion

from pyserini.demo.aclchatgpt.skill import PyseriniSkill, PyseriniConfig


@dataclass
class OpenAIConfig:
api_key: str
org_id: str

class ChatBot:

acl_chat_prompt = """Given the query_results below, your task is to formulate an answer. You may choose to use or not to use the
information in the query_results.
If you use the query_results, you must reference the docid of the document used by appending to the answer with "(docid: doc-id-here)".
If you do not use the query_results, do not reference it in your answer.

===================
query_results: {{pyserini.search $input}}
===================

What is your response to {{$input}}?
"""

absolute_question_prompt = """
Task: You are an AI language model tasked with transforming given questions into
absolute questions. An absolute question is a question that can stand on its own and carries all the context needed
to be answered. Here's an example:

User: Who is Alan Turing?
ChatBot: Who is Alan Turing?
User: How old is he?
ChatBot: How old is Alan Turing?

===================
History: {{$history}}
===================

Using the history as context, transform the following question into an absolute question: {{$input}}
"""

def __init__(self, pyserini_config: PyseriniConfig, openai_config: OpenAIConfig):

self.kernel = sk.Kernel()
self.kernel.add_text_completion_service("dv", OpenAITextCompletion("text-davinci-003", openai_config.api_key, openai_config.org_id))
self.kernel.import_skill(PyseriniSkill(pyserini_config),"pyserini")
self.context = self.kernel.create_new_context()
self.context["url"] = "http://127.0.0.1:8080/search"
self.context["history"] = ""

self.acl_chat_function = self.kernel.create_semantic_function(self.acl_chat_prompt, max_tokens=200, temperature=0, top_p=0.5)
self.absolute_question_function = self.kernel.create_semantic_function(self.absolute_question_prompt, max_tokens=200, temperature=0, top_p=0.5)


def _chat(self,input_text: str) -> None:

print("---------------------------------------------")
absolute_question = self.absolute_question_function(input_text,context=self.context)

print (f"Absolute Question: {absolute_question}")

# Process the user message and get an answer
answer = self.acl_chat_function(str(absolute_question),context=self.context)

# Show the response
print(f"ChatBot: {answer}")

self.context["history"] += f"\nUser: {input_text}\nChatBot: {answer}\n"

def chat(self) -> None:
print("Hi, I'm the ACL ChatBot. Ask me a question about ACL Anthology papers and I'll do my best to answer it.")

while True:
print("=============================================")
self._chat(input("User: "))

def main():

parser = ArgumentParser()

parser.add_argument('--k1', type=float, help='BM25 k1 parameter.')
parser.add_argument('--b', type=float, help='BM25 b parameter.')
parser.add_argument('--hits', type=int, default=10, help='Number of hits returned by the retriever')

args = parser.parse_args()
api_key, org_id = sk.openai_settings_from_dot_env()
open_ai_config = OpenAIConfig(api_key,org_id)
pyserini_config = PyseriniConfig(args.k1, args.b, args.hits)

print("Starting ChatBot...")

chatbot = ChatBot(pyserini_config=pyserini_config,openai_config=open_ai_config)
chatbot.chat()




if __name__ == '__main__':
main()
20 changes: 20 additions & 0 deletions pyserini/demo/aclchatgpt/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Talking to ACL Anthology with GPT

By default, GPT-3 text-davinci-003 is used. You can change this in `chatbot.py`.

## Environment Varaibles
You will need to create an `.env` file in directory that you are running in.
The `.env` file should contain the following:
```
OPENAI_API_KEY="<your key>"
OPENAI_ORG_ID="<your org id>"
```

## Setup

1. Follow the instructions in [Indexing the ACL Anthology with Anserini](https://github.com/castorini/pyserini/blob/master/docs/working-with-acl-anthology.md) to setup the project and generate a `lucene-index-acl-paragraph` index.
2. Copy the generated `lucene-index-acl-paragraph` index from the `acl-anthology` folder to `pyserini/indexes`
3. You will need Semantic Kernel as well.
- `pip3 install --upgrade semantic-kernel`
4. Start the chatbot with `python -m pyserini.demo.aclchatgpt.chatbot`
5. Start chatting!
75 changes: 75 additions & 0 deletions pyserini/demo/aclchatgpt/skill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Microsoft. All rights reserved.

import json
import logging
from dataclasses import dataclass

import aiohttp
import requests

from semantic_kernel.orchestration.sk_context import SKContext
from typing import Callable, Optional, Tuple, Union
from semantic_kernel.skill_definition import sk_function, sk_function_context_parameter
from pyserini.search import LuceneSearcher, FaissSearcher, AutoQueryEncoder

@dataclass
class PyseriniConfig:
k1: Optional[float]=None
b: Optional[float]=None
hits: Optional[int]=1

class PyseriniSkill:
"""
A skill that uses Pyserini to search a corpus of documents.

Usage:
kernel.import_skill(PyseriniSkill(), "http")

Examples:

{{pyserini.search $query}}
"""

def __init__(self,pyserini_config:PyseriniConfig):
self.lang = 'en'
self.searcher = LuceneSearcher('indexes/lucene-index-acl-paragraph')
self.searcher.set_language(self.lang)
if pyserini_config.k1 is not None and pyserini_config.b is not None:
self.searcher.set_bm25(pyserini_config.k1, pyserini_config.b)
self.retriever_name = f'BM25 (k1={pyserini_config.k1}, b={pyserini_config.b})'
else:
self.retriever_name = 'BM25'
self.hits = pyserini_config.hits

@sk_function(description="Searches a corpus of documents using Pyserini using the specified query.", name="search")
@sk_function_context_parameter(name="url", description="The url of the request")
async def search(self, query: str, context: SKContext) -> str:
"""
Searches a corpus of documents using Pyserini using the specified query.
Return the response body as a string.

params:
query: The query to search for.
context: The SKContext containing the url of the request.
returns:
The response body as a string.
"""
_, url = context.variables.get("url")
if not url:
raise ValueError("url cannot be `None` or empty")

if not query:
search_results = []
else:
hits = self.searcher.search(query, k=self.hits)
docs = [self.searcher.doc(hit.docid) for hit in hits]
search_results = [
{
'rank': r + 1,
'docid': hit.docid,
'doc': docs[r].contents(),
}
for r, hit in enumerate(hits)
]
return "docid:" + search_results[0]["docid"] + ",doc:" + search_results[0]["doc"]