Skip to content

Commit

Permalink
Merge pull request #653 from openchatai/feat/neural_search
Browse files Browse the repository at this point in the history
Feat/neural search
  • Loading branch information
codebanesr authored Feb 22, 2024
2 parents 7ed4285 + 489338c commit c26d857
Show file tree
Hide file tree
Showing 20 changed files with 281 additions and 140 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion copilot-widget/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ <h2>
</style>
<div id="opencopilot-root"></div>
<script>
const token = "EvWRh4cqyGIgct4c";
const token = "NtITS4Z07ZrdctTN";
const apiUrl = "http://localhost:8888/backend";
const socketUrl = "http://localhost:8888";
</script>
Expand Down
2 changes: 1 addition & 1 deletion copilot-widget/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@openchatai/copilot-widget",
"private": false,
"version": "2.5.3",
"version": "2.5.4",
"type": "module",
"scripts": {
"dev": "vite",
Expand Down
Empty file modified copilot-widget/scripts/update-lib.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion dashboard/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"dependencies": {
"@hookform/resolvers": "^3.3.1",
"@kbox-labs/react-echarts": "^1.0.3",
"@openchatai/copilot-widget": "^2.5.3",
"@openchatai/copilot-widget": "^2.5.4",
"@radix-ui/react-accordion": "^1.1.2",
"@radix-ui/react-alert-dialog": "^1.0.5",
"@radix-ui/react-avatar": "^1.0.4",
Expand Down
8 changes: 4 additions & 4 deletions dashboard/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dashboard/public/pilot.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llm-server/models/repository/action_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def update_action(action_id: str, data: ActionDTO) -> Action:
action.name = data.name
action.description = data.description
action.operation_id = data.operation_id
action.base_uri = data.api_endpoint
action.api_endpoint = data.api_endpoint
action.request_type = data.request_type
action.payload = data.payload
action.updated_at = datetime.datetime.utcnow()
Expand Down
3 changes: 2 additions & 1 deletion llm-server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,5 @@ wrapt==1.16.0
wsproto==1.2.0
yarl==1.9.4
zipp==3.17.0
aioredis==2.0.1
aioredis==2.0.1
scrapingbee==2.0.1
1 change: 0 additions & 1 deletion llm-server/routes/action/action_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def add_action(chatbot_id):
@action.route("/bot/<string:chatbot_id>/action/<string:action_id>", methods=["PATCH"])
def update_single_action(chatbot_id: str, action_id: str):
action_dto = ActionDTO(bot_id=chatbot_id, **request.get_json())

# Todo make sure either both or non go in
saved_action = update_action(action_id, action_dto)
action_vector_service.update_action_by_operation_id(action_dto)
Expand Down
16 changes: 8 additions & 8 deletions llm-server/routes/flow/api_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

class ApiInfo:
def __init__(
self,
endpoint: Optional[str],
method: Optional[str],
path_params: Any,
query_params: Any,
body_schema: Any,
self,
endpoint: Optional[str],
method: Optional[str],
path_params: Any,
query_params: Any,
body_schema: Any,
) -> None:
self.endpoint = endpoint
self.method = method
self.path_params: Any = {"properties": path_params}
self.query_params: Any = {"properties": query_params}
self.path_params: Any = path_params
self.query_params: Any = query_params
self.body_schema = body_schema
4 changes: 2 additions & 2 deletions llm-server/routes/flow/generate_openapi_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ async def generate_api_payload(
body_schema=body_schema,
)

if api_info.path_params["properties"]:
if api_info.path_params:
api_info.path_params = await gen_params_from_schema(
json.dumps(api_info.path_params, separators=(",", ":")),
text,
prev_api_response,
current_state,
)

if api_info.query_params["properties"]:
if api_info.query_params:
api_info.query_params = await gen_params_from_schema(
json.dumps(api_info.query_params, separators=(",", ":")),
text,
Expand Down
4 changes: 3 additions & 1 deletion llm-server/routes/flow/utils/process_conversation_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def is_it_informative_or_actionable(
messages.extend(chat_history[-6:])
messages.append(HumanMessage(content=current_message))
messages.append(
HumanMessage(content="Return the corresponding json for the last user input")
HumanMessage(
content="Return the corresponding json for the last user input, without any commentary."
)
)

content = cast(str, chat(messages=messages).content)
Expand Down
107 changes: 107 additions & 0 deletions llm-server/routes/search/search_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import uuid
from shared.utils.opencopilot_utils.get_embeddings import get_embeddings
from utils.llm_consts import VectorCollections, initialize_qdrant_client
from qdrant_client import models
from typing import Dict, List, Optional
import operator
from copy import deepcopy

client = initialize_qdrant_client()
embedding = get_embeddings()


# Data structure (you might want to define a custom class/dataclass)
class Item:
title: str
heading_text: str
heading_id: str

def __init__(self, title: str, heading_text: str, heading_id: str):
self.title = title
self.heading_text = heading_text
self.heading_id = heading_id


# Function to add vectors to Qdrant
def add_cmdbar_data(items: List[Item], metadata: Dict[str, str]) -> None:
points = [] # Batch of points to insert

titles = list(map(operator.attrgetter("title"), items))
headings = list(map(operator.attrgetter("heading_text"), items))

# this logic has to be removed, currently we are only using the html title....
title_embedding = None
if len(titles) > 3 and (titles[0] == titles[1] == titles[2] == titles[3]):
e = embedding.embed_query(titles[0])
title_embeddings = [e for _ in range(len(titles))]

else:
title_embeddings = embedding.embed_documents(titles)

description_embeddings = embedding.embed_documents(headings)
for index, item in enumerate(items):
title_embedding = title_embeddings[index]
description_embedding = description_embeddings[index]
_metadata = deepcopy(metadata)
_metadata["title"] = item.title
_metadata["description"] = item.heading_text or ""
_metadata["heading_id"] = item.heading_id or ""

points.append(
models.PointStruct(
id=uuid.uuid4().hex, # Placeholder - See explanation below
payload={"metadata": _metadata},
vector={
"description": title_embedding,
"title": description_embedding,
},
)
)

# Perform a single batch insert
client.upsert(collection_name=VectorCollections.neural_search, points=points)


# Function to search with weights
def weighted_search(
query: str, title_weight: float = 0.7, description_weight: float = 0.3
) -> List[models.ScoredPoint]:
query_embedding = embedding.embed_query(query)

# Search title and descriptions
title_results = client.search(
collection_name=VectorCollections.neural_search,
query_vector=models.NamedVector(name="title", vector=query_embedding),
with_payload=True,
with_vector=False,
)

description_results = client.search(
collection_name=VectorCollections.neural_search,
query_vector=models.NamedVector(name="description", vector=query_embedding),
with_payload=True,
with_vector=False,
)

# Build a lookup for description results
description_lookup = {result.id: result for result in description_results}

# Combine, weigh, and sort results
results: List[models.ScoredPoint] = []
for title_result in title_results:
matching_desc_result = description_lookup.get(title_result.id)
if matching_desc_result:
combined_score = (title_weight * title_result.score) + (
description_weight * matching_desc_result.score
)
results.append(
models.ScoredPoint(
version=1,
id=title_result.id,
payload=title_result.payload,
score=combined_score,
)
)

results.sort(key=lambda x: x.score, reverse=True)
return results
10 changes: 6 additions & 4 deletions llm-server/shared/utils/opencopilot_utils/get_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

@lru_cache(maxsize=1)
def get_embeddings():
embedding_provider = os.environ.get("EMBEDDING_PROVIDER", EmbeddingProvider.OPENAI.value)
embedding_provider = os.environ.get(
"EMBEDDING_PROVIDER", EmbeddingProvider.OPENAI.value
)

if embedding_provider == EmbeddingProvider.azure.value:
deployment = os.environ.get("AZURE_OPENAI_EMBEDDING_MODEL_NAME")
Expand All @@ -23,9 +25,9 @@ def get_embeddings():
return OpenAIEmbeddings(
deployment=deployment,
client=client,
chunk_size=8,
chunk_size=1000,
)

elif embedding_provider == EmbeddingProvider.openchat.value:
logger.info("Got ollama embedding provider", provider=embedding_provider)
return OllamaEmbeddings(base_url=f"{LOCAL_IP}:11434", model="openchat")
Expand All @@ -45,4 +47,4 @@ def get_embeddings():
raise ValueError(
f"Embedding service '{embedding_provider}' is not currently available. "
f"Available services: {available_providers}"
)
)
1 change: 1 addition & 0 deletions llm-server/utils/llm_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class VectorCollections:
flows = "flows"
actions = "actions"
knowledgebase = "knowledgebase"
neural_search = "neural_search"


class ChatStrategy:
Expand Down
24 changes: 24 additions & 0 deletions llm-server/utils/vector_store_setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from qdrant_client import QdrantClient, models
import os

import qdrant_client

from utils.llm_consts import VectorCollections, initialize_qdrant_client

vector_size = int(os.getenv("VECTOR_SIZE", "1536"))
Expand All @@ -24,10 +26,32 @@ def try_create_collection(name: str, vectors_config: models.VectorParams):
print(f"{name} collection already exists, ignoring")


# quick fix, connect to the try_create_collection function
def try_create_neural_search_collection(name: str, vector_params: models.VectorParams):
try:
client.create_collection(
collection_name=name,
vectors_config={
"title": vector_params,
"description": vector_params,
},
)

client.create_payload_index(
collection_name=name,
field_name="metadata.bot_id",
field_schema=models.PayloadFieldSchema.KEYWORD,
)
except Exception:
print(f"{name} collection already exists, ignoring")


vector_params = models.VectorParams(size=vector_size, distance=models.Distance.COSINE)


def init_qdrant_collections():
try_create_collection(VectorCollections.knowledgebase, vector_params)
try_create_collection(VectorCollections.actions, vector_params)
try_create_collection(VectorCollections.flows, vector_params)
try_create_collection(VectorCollections.flows, vector_params)
try_create_neural_search_collection(VectorCollections.neural_search, vector_params)
Loading

0 comments on commit c26d857

Please sign in to comment.