Skip to content

Commit

Permalink
Bump size default from 5 to 20 (#252)
Browse files Browse the repository at this point in the history
* Bumps size default from 5 to 20
* Limits source documents sent via websockets to 5
* Sends all source documents to the chat metrics endpoint
  • Loading branch information
bmquinn authored Aug 26, 2024
1 parent 5597f6a commit 730c6ca
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 49 deletions.
2 changes: 1 addition & 1 deletion chat/src/event_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
K_VALUE = 40
MAX_K = 100
MAX_TOKENS = 1000
SIZE = 5
SIZE = 20
TEMPERATURE = 0.2
TEXT_KEY = "id"
VERSION = "2024-02-01"
Expand Down
4 changes: 2 additions & 2 deletions chat/src/helpers/hybrid_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ def filter(query: dict):
}
}

def hybrid_query(query: str, model_id: str, vector_field: str = "embedding", k: int = 10, **kwargs: Any):
def hybrid_query(query: str, model_id: str, vector_field: str = "embedding", k: int = 40, **kwargs: Any):
result = {
"size": kwargs.get("size", 5),
"size": kwargs.get("size", 20),
"query": {
"hybrid": {
"queries": [
Expand Down
13 changes: 10 additions & 3 deletions chat/src/helpers/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ def get_and_send_original_question(docs):
source_document = doc.metadata.copy()
source_document["content"] = doc.page_content
source_documents.append(source_document)


socket_message = {
"question": self.config.question,
"source_documents": source_documents[:5]
}
self.config.socket.send(socket_message)

original_question = {
"question": self.config.question,
"source_documents": source_documents,
"source_documents": source_documents
}
self.config.socket.send(original_question)
self.original_question = original_question

docs["source_documents"] = source_documents
return docs

return RunnablePassthrough(get_and_send_original_question)
Expand Down
59 changes: 17 additions & 42 deletions chat/test/helpers/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,7 @@ def setUp(self):
self.question = "What is your name?"
self.original_question = {
"question": self.question,
"source_documents": [
{
"accession_number": "SourceDoc:1",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"canonical_link": "https://dc.library.northwestern.edu/items/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"title": "Source Document One!"
},
{
"accession_number": "SourceDoc:2",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"canonical_link": "https://dc.library.northwestern.edu/items/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"title": "Source Document Two!"
},
{
"accession_number": "SourceDoc:3",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"canonical_link": "https://dc.library.northwestern.edu/items/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"title": "Source Document Three!"
},
{
"accession_number": "SourceDoc:4",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9",
"canonical_link": "https://dc.library.northwestern.edu/items/211eeeca-d56e-4c6e-9123-1612d72258f9",
"title": "Source Document Four!"
},
{
"accession_number": "SourceDoc:5",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e",
"canonical_link": "https://dc.library.northwestern.edu/items/10e45e7a-8011-4ac5-97df-efa6a5439d0e",
"title": "Source Document Five!"
}
],
"source_documents": self.generate_source_documents(20),
}
self.event = {
"body": json.dumps({
Expand All @@ -75,23 +44,29 @@ def setUp(self):
self.response = {
"output_text": "This is a test response.",
}

def generate_source_documents(self, count):
return [
{
"accession_number": f"SourceDoc:{i+1}",
"api_link": f"https://api.dc.library.northwestern.edu/api/v2/works/{i+1:0>32}",
"canonical_link": f"https://dc.library.northwestern.edu/items/{i+1:0>32}",
"title": f"Source Document {i+1}!"
}
for i in range(count)
]

def test_debug_response(self):
result = debug_response(self.config, self.response, self.original_question)

self.assertEqual(result["k"], 40)
self.assertEqual(result["question"], self.question)
self.assertEqual(result["ref"], "test")
self.assertEqual(result["size"], 5)
self.assertEqual(result["size"], 20)
self.assertEqual(len(result["source_documents"]), 20)
self.assertEqual(
result["source_documents"],
[
"https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9",
"https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e"
]
[doc["api_link"] for doc in self.original_question["source_documents"]]
)

def test_token_usage(self):
Expand All @@ -101,8 +76,8 @@ def test_token_usage(self):
"answer": 12,
"prompt": 329,
"question": 5,
"source_documents": 527,
"total": 873
"source_documents": 1602,
"total": 1948
}

self.assertEqual(result, expected_result)
Expand Down
2 changes: 1 addition & 1 deletion chat/test/test_event_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_attempt_override_without_superuser_status(self):
"k": 40,
"openai_api_version": "2024-02-01",
"question": "test question",
"size": 5,
"size": 20,
"ref": "test ref",
"temperature": 0.2,
"text_key": "id",
Expand Down

0 comments on commit 730c6ca

Please sign in to comment.