Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deploy DC API to production #224

Merged
merged 18 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-node.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16.x
node-version: 20.x
cache: "npm"
cache-dependency-path: 'node/package-lock.json'
- run: npm ci
Expand Down
2 changes: 1 addition & 1 deletion .tool-versions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
nodejs 16.14.0
nodejs 20.15.0
java corretto-19.0.1.10.1
aws-sam-cli 1.107.0
python 3.10.5
4 changes: 4 additions & 0 deletions chat/src/handlers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def handler(event, context):
if not config.is_logged_in:
config.socket.send({"type": "error", "message": "Unauthorized"})
return {"statusCode": 401, "body": "Unauthorized"}

if config.question is None or config.question == "":
config.socket.send({"type": "error", "message": "Question cannot be blank"})
return {"statusCode": 400, "body": "Question cannot be blank"}

debug_message = config.debug_message()
if config.debug_mode:
Expand Down
29 changes: 2 additions & 27 deletions chat/src/handlers/opensearch_neural_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from langchain_core.vectorstores import VectorStore
from opensearchpy import OpenSearch
from typing import Any, List, Tuple

from helpers.hybrid_query import hybrid_query

class OpenSearchNeuralSearch(VectorStore):
"""Read-only OpenSearch vectorstore with neural search."""
Expand Down Expand Up @@ -40,33 +40,8 @@ def similarity_search_with_score(
self, query: str, k: int = 10, subquery: Any = None, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Return docs most similar to query."""
dsl = {
"size": k,
"query": {
"hybrid": {
"queries": [
{
"neural": {
self.vector_field: {
"query_text": query,
"model_id": self.model_id,
"k": k,
}
}
}
]
}
},
}

if subquery:
dsl["query"]["hybrid"]["queries"].append(subquery)

for key, value in kwargs.items():
dsl[key] = value

dsl = hybrid_query(query=query, model_id=self.model_id, vector_field=self.vector_field, k=k, subquery=subquery, **kwargs)
response = self.client.search(index=self.index, body=dsl, params={"search_pipeline": self.search_pipeline} if self.search_pipeline else None)

documents_with_scores = [
(
Document(
Expand Down
71 changes: 71 additions & 0 deletions chat/src/helpers/hybrid_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Any

def filter(query: dict):
return {
"bool": {
"must": [
query,
{ "terms": { "visibility": ["Public", "Institution"] } },
{ "term": { "published": True } }
]
}
}

def hybrid_query(query: str, model_id: str, vector_field: str = "embedding", k: int = 10, subquery: Any = None, **kwargs: Any):
if subquery:
weights = [0.5, 0.3, 0.2]
else:
weights = [0.7, 0.3]

result = {
"size": k,
"query": {
"hybrid": {
"queries": [
filter({
"query_string": {
"default_operator": "AND",
"fields": ["title^5", "all_controlled_labels", "all_ids^5"],
"query": query
}
}),
filter({
"neural": {
vector_field: {
"k": k,
"model_id": model_id,
"query_text": query
}
}
})
]
},
},
"search_pipeline": {
"phase_results_processors": [
{
"normalization-processor": {
"combination": {
"parameters": {
"weights": weights
},
"technique": "arithmetic_mean"
},
"normalization": {
"technique": "l2"
}
}
}
]
}
}

if subquery:
result["query"]["hybrid"]["queries"].append(filter(subquery))

for key, value in kwargs.items():
result[key] = value

return result


1 change: 0 additions & 1 deletion chat/src/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def opensearch_vector_store(region_name=os.getenv("AWS_REGION")):
endpoint=os.getenv("OPENSEARCH_ENDPOINT"),
connection_class=RequestsHttpConnection,
http_auth=awsauth,
search_pipeline=prefix("dc-v2-work-pipeline"),
text_field= "id"
)
return docsearch
Expand Down
2 changes: 1 addition & 1 deletion chat/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ Resources:
- logs:PutLogEvents
Resource: !Sub "${ChatMetricsLog.Arn}:*"
Metadata:
BuildMethod: nodejs18.x
BuildMethod: nodejs20.x
ChatMetricsLog:
Type: AWS::Logs::LogGroup
Properties:
Expand Down
30 changes: 26 additions & 4 deletions chat/test/handlers/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ class TestHandler(TestCase):
def test_handler_unauthorized(self):
event = {"socket": Websocket(client=MockClient(), endpoint_url="test", connection_id="test", ref="test")}
self.assertEqual(handler(event, MockContext()), {'body': 'Unauthorized', 'statusCode': 401})

@patch.object(ApiToken, 'is_logged_in')
def test_handler_success(self, mock_is_logged_in):
mock_is_logged_in.return_value = True
event = {"socket": Websocket(client=MockClient(), endpoint_url="test", connection_id="test", ref="test")}
event = {"socket": Websocket(client=MockClient(), endpoint_url="test", connection_id="test", ref="test"), "body": '{"question": "Question?"}' }
self.assertEqual(handler(event, MockContext()), {'statusCode': 200})

@patch.object(ApiToken, 'is_logged_in')
Expand All @@ -51,7 +51,7 @@ def test_handler_debug_mode(self, mock_is_debug_enabled, mock_is_logged_in, mock
mock_is_superuser.return_value = True
mock_client = MockClient()
mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test")
event = {"socket": mock_websocket, "debug": True}
event = {"socket": mock_websocket, "debug": True, "body": '{"question": "Question?"}' }
handler(event, MockContext())
response = json.loads(mock_client.received_data)
self.assertEqual(response["type"], "debug")
Expand All @@ -65,7 +65,29 @@ def test_handler_debug_mode_for_superusers_only(self, mock_is_debug_enabled, moc
mock_is_superuser.return_value = False
mock_client = MockClient()
mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test")
event = {"socket": mock_websocket, "debug": True}
event = {"socket": mock_websocket, "debug": True, "body": '{"question": "Question?"}' }
handler(event, MockContext())
response = json.loads(mock_client.received_data)
self.assertEqual(response["type"], "error")

@patch.object(ApiToken, 'is_logged_in')
def test_handler_question_missing(self, mock_is_logged_in):
mock_is_logged_in.return_value = True
mock_client = MockClient()
mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test")
event = {"socket": mock_websocket}
handler(event, MockContext())
response = json.loads(mock_client.received_data)
self.assertEqual(response["type"], "error")
self.assertEqual(response["message"], "Question cannot be blank")

@patch.object(ApiToken, 'is_logged_in')
def test_handler_question_blank(self, mock_is_logged_in):
mock_is_logged_in.return_value = True
mock_client = MockClient()
mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test")
event = {"socket": mock_websocket, "body": '{"quesion": ""}'}
handler(event, MockContext())
response = json.loads(mock_client.received_data)
self.assertEqual(response["type"], "error")
self.assertEqual(response["message"], "Question cannot be blank")
26 changes: 26 additions & 0 deletions chat/test/helpers/test_hybrid_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import sys
from helpers.hybrid_query import hybrid_query
from unittest import TestCase

sys.path.append('./src')

class TestFunction(TestCase):
def test_hybrid_query(self):
subquery = { "term": { "title": { "value": "The Title" } } }
dsl = hybrid_query("Question?", "MODEL_ID", k=10, subquery=subquery)
subject = dsl["query"]["hybrid"]["queries"]

checks = [
(lambda x: x["query_string"]["query"], "Question?"),
(lambda x: x["neural"]["embedding"]["model_id"], "MODEL_ID"),
(lambda x: x["term"]["title"]["value"], "The Title")
]

self.assertEqual(len(subject), 3)

for i in range(3):
lookup, expected = checks[i]
queries = subject[i]["bool"]["must"]
self.assertEqual(lookup(queries[0]), expected)
self.assertIn({ "terms": { "visibility": ["Public", "Institution"] } }, queries)
self.assertIn({ "term": { "published": True } }, queries)
Loading
Loading