Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Staging to main - bug fixes #959

Merged
merged 14 commits into from
Dec 19, 2024
21 changes: 2 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,14 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b
### Local deployment
#### Running through docker-compose
By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations.
Accoroding to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need.
According to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need.
EX:
```env
VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash"
```

if you only want OpenAI:
```env
VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o"
```

Backend ENV
```env
OPENAI_API_KEY="your-openai-key"
```

if you only want Diffbot:
```env
VITE_LLM_MODELS_PROD="diffbot"
```

Backend ENV
```env
DIFFBOT_API_KEY="your-diffbot-key"
```

You can then run Docker Compose to build and start all components:
```bash
docker-compose up --build
Expand Down Expand Up @@ -89,6 +71,7 @@ VITE_CHAT_MODES=""
If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env:
```env
VITE_CHAT_MODES="vector,graph"
VITE_CHAT_MODES="vector,graph"
```

#### Running Backend and Frontend separately (dev environment)
Expand Down
5 changes: 3 additions & 2 deletions backend/example.env
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ NEO4J_USER_AGENT=""
ENABLE_USER_AGENT = ""
LLM_MODEL_CONFIG_model_version=""
ENTITY_EMBEDDING="" True or False
DUPLICATE_SCORE_VALUE = ""
DUPLICATE_TEXT_DISTANCE = ""
DUPLICATE_SCORE_VALUE =0.97
DUPLICATE_TEXT_DISTANCE =3
DEFAULT_DIFFBOT_CHAT_MODEL="openai_gpt_4o" #whichever model specified here , need to add config for that model in below format)
#examples
LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key"
LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key"
Expand Down
51 changes: 28 additions & 23 deletions backend/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,19 @@
from src.communities import create_communities
from src.neighbours import get_neighbour_nodes
import json
from typing import List, Mapping, Union
from typing import List
from starlette.middleware.sessions import SessionMiddleware
import google_auth_oauthlib.flow
from google.oauth2.credentials import Credentials
import os
from src.logger import CustomLogger
from datetime import datetime, timezone
import time
import gc
from Secweb import SecWeb
from Secweb.StrictTransportSecurity import HSTS
from Secweb.ContentSecurityPolicy import ContentSecurityPolicy
from Secweb.XContentTypeOptions import XContentTypeOptions
from Secweb.XFrameOptions import XFrame
from fastapi.middleware.gzip import GZipMiddleware
from src.ragas_eval import *
from starlette.types import ASGIApp, Message, Receive, Scope, Send
import gzip
from langchain_neo4j import Neo4jGraph

logger = CustomLogger()
Expand Down Expand Up @@ -493,11 +488,13 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form())
start = time.time()
graph = create_graph_database_connection(uri, userName, password, database)
result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database)
gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
end = time.time()
elapsed_time = end - start
json_obj = {'api_name':'connect','db_url':uri, 'userName':userName, 'database':database,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'}
logger.log_struct(json_obj, "INFO")
result['elapsed_api_time'] = f'{elapsed_time:.2f}'
result['gcs_file_cache'] = gcs_file_cache
return create_api_response('Success',data=result)
except Exception as e:
job_status = "Failed"
Expand Down Expand Up @@ -571,6 +568,8 @@ async def generate():
uri = url
if " " in url:
uri= url.replace(" ","+")
graph = create_graph_database_connection(uri, userName, decoded_password, database)
graphDb_data_Access = graphDBdataAccess(graph)
while True:
try:
if await request.is_disconnected():
Expand All @@ -579,8 +578,6 @@ async def generate():
# get the current status of document node

else:
graph = create_graph_database_connection(uri, userName, decoded_password, database)
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.get_current_status_document_node(file_name)
if len(result) > 0:
status = json.dumps({'fileName':file_name,
Expand Down Expand Up @@ -968,22 +965,30 @@ async def fetch_chunktext(
gc.collect()


@app.post("/backend_connection_configuation")
async def backend_connection_configuation():
@app.post("/backend_connection_configuration")
async def backend_connection_configuration():
try:
graph = Neo4jGraph()
logging.info(f'login connection status of object: {graph}')
if graph is not None:
graph_connection = True
isURI = os.getenv('NEO4J_URI')
isUsername= os.getenv('NEO4J_USERNAME')
isDatabase= os.getenv('NEO4J_DATABASE')
isPassword= os.getenv('NEO4J_PASSWORD')
encoded_password = encode_password(isPassword)
graphDb_data_Access = graphDBdataAccess(graph)
gds_status = graphDb_data_Access.check_gds_version()
write_access = graphDb_data_Access.check_account_access(database=isDatabase)
return create_api_response('Success',message=f"Backend connection successful",data={'graph_connection':graph_connection,'uri':isURI,'user_name':isUsername,'database':isDatabase,'password':encoded_password,'gds_status':gds_status,'write_access':write_access})
uri = os.getenv('NEO4J_URI')
username= os.getenv('NEO4J_USERNAME')
database= os.getenv('NEO4J_DATABASE')
password= os.getenv('NEO4J_PASSWORD')
gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
if all([uri, username, database, password]):
print(f'uri:{uri}, usrName:{username}, database :{database}, password: {password}')
graph = Neo4jGraph()
logging.info(f'login connection status of object: {graph}')
if graph is not None:
graph_connection = True
encoded_password = encode_password(password)
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.connection_check_and_get_vector_dimensions(database)
result["graph_connection"] = graph_connection
result["uri"] = uri
result["user_name"] = username
result["database"] = database
result["password"] = encoded_password
result['gcs_file_cache'] = gcs_file_cache
return create_api_response('Success',message=f"Backend connection successful",data=result)
else:
graph_connection = False
return create_api_response('Success',message=f"Backend connection is not successful",data=graph_connection)
Expand Down
6 changes: 6 additions & 0 deletions backend/src/entities/source_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ class sourceNode:
gcsBucketFolder:str=None
gcsProjectId:str=None
awsAccessKeyId:str=None
chunkNodeCount:int=None
chunkRelCount:int=None
entityNodeCount:int=None
entityEntityRelCount:int=None
communityNodeCount:int=None
communityRelCount:int=None
node_count:int=None
relationship_count:str=None
model:str=None
Expand Down
111 changes: 61 additions & 50 deletions backend/src/graphDB_dataAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,24 @@ def create_source_node(self, obj_source_node:sourceNode):
d.relationshipCount = $r_count, d.model= $model, d.gcsBucket=$gcs_bucket,
d.gcsBucketFolder= $gcs_bucket_folder, d.language= $language,d.gcsProjectId= $gcs_project_id,
d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0,
d.access_token=$access_token""",
d.access_token=$access_token,
d.chunkNodeCount=$chunkNodeCount,d.chunkRelCount=$chunkRelCount,
d.entityNodeCount=$entityNodeCount,d.entityEntityRelCount=$entityEntityRelCount,
d.communityNodeCount=$communityNodeCount,d.communityRelCount=$communityRelCount""",
{"fn":obj_source_node.file_name, "fs":obj_source_node.file_size, "ft":obj_source_node.file_type, "st":job_status,
"url":obj_source_node.url,
"awsacc_key_id":obj_source_node.awsAccessKeyId, "f_source":obj_source_node.file_source, "c_at":obj_source_node.created_at,
"u_at":obj_source_node.created_at, "pt":0, "e_message":'', "n_count":0, "r_count":0, "model":obj_source_node.model,
"gcs_bucket": obj_source_node.gcsBucket, "gcs_bucket_folder": obj_source_node.gcsBucketFolder,
"language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId,
"access_token":obj_source_node.access_token})
"access_token":obj_source_node.access_token,
"chunkNodeCount":obj_source_node.chunkNodeCount,
"chunkRelCount":obj_source_node.chunkRelCount,
"entityNodeCount":obj_source_node.entityNodeCount,
"entityEntityRelCount":obj_source_node.entityEntityRelCount,
"communityNodeCount":obj_source_node.communityNodeCount,
"communityRelCount":obj_source_node.communityRelCount
})
except Exception as e:
error_message = str(e)
logging.info(f"error_message = {error_message}")
Expand Down Expand Up @@ -108,7 +118,7 @@ def update_source_node(self, obj_source_node:sourceNode):
self.graph.query(query,param)
except Exception as e:
error_message = str(e)
self.update_exception_db(self.file_name,error_message)
self.update_exception_db(self,self.file_name,error_message)
raise Exception(error_message)

def get_source_list(self):
Expand Down Expand Up @@ -463,51 +473,52 @@ def update_node_relationship_count(self,document_name):
param = {"document_name": document_name}
result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param)
response = {}
for record in result:
filename = record["filename"]
chunkNodeCount = record["chunkNodeCount"]
chunkRelCount = record["chunkRelCount"]
entityNodeCount = record["entityNodeCount"]
entityEntityRelCount = record["entityEntityRelCount"]
if (not document_name) and (community_flag):
communityNodeCount = record["communityNodeCount"]
communityRelCount = record["communityRelCount"]
else:
communityNodeCount = 0
communityRelCount = 0
nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
update_query = """
MATCH (d:Document {fileName: $filename})
SET d.chunkNodeCount = $chunkNodeCount,
d.chunkRelCount = $chunkRelCount,
d.entityNodeCount = $entityNodeCount,
d.entityEntityRelCount = $entityEntityRelCount,
d.communityNodeCount = $communityNodeCount,
d.communityRelCount = $communityRelCount,
d.nodeCount = $nodeCount,
d.relationshipCount = $relationshipCount
"""
self.execute_query(update_query,{
"filename": filename,
"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
})

response[filename] = {"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
}

if result:
for record in result:
filename = record.get("filename",None)
chunkNodeCount = int(record.get("chunkNodeCount",0))
chunkRelCount = int(record.get("chunkRelCount",0))
entityNodeCount = int(record.get("entityNodeCount",0))
entityEntityRelCount = int(record.get("entityEntityRelCount",0))
if (not document_name) and (community_flag):
communityNodeCount = int(record.get("communityNodeCount",0))
communityRelCount = int(record.get("communityRelCount",0))
else:
communityNodeCount = 0
communityRelCount = 0
nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
update_query = """
MATCH (d:Document {fileName: $filename})
SET d.chunkNodeCount = $chunkNodeCount,
d.chunkRelCount = $chunkRelCount,
d.entityNodeCount = $entityNodeCount,
d.entityEntityRelCount = $entityEntityRelCount,
d.communityNodeCount = $communityNodeCount,
d.communityRelCount = $communityRelCount,
d.nodeCount = $nodeCount,
d.relationshipCount = $relationshipCount
"""
self.execute_query(update_query,{
"filename": filename,
"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
})

response[filename] = {"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
}

return response
4 changes: 3 additions & 1 deletion backend/src/graph_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def get_graph_results(uri, username, password,database,document_names):

def get_chunktext_results(uri, username, password, database, document_name, page_no):
"""Retrieves chunk text, position, and page number from graph data with pagination."""
driver = None
try:
logging.info("Starting chunk text query process")
offset = 10
Expand Down Expand Up @@ -254,4 +255,5 @@ def get_chunktext_results(uri, username, password, database, document_name, page
logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
finally:
driver.close()
if driver:
driver.close()
Loading
Loading