Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix error on launch #340

Merged
merged 3 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gpt3discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from models.openai_model import Model


__version__ = "11.7.3"
__version__ = "11.8.0"


PID_FILE = Path("bot.pid")
Expand Down
31 changes: 12 additions & 19 deletions models/index_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
from langchain.llms import OpenAIChat
from langchain.memory import ConversationBufferMemory
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index.data_structs.data_structs import Node
from llama_index.data_structs.node import DocumentRelationship
from llama_index.schema import NodeRelationship
from llama_index.indices.query.query_transform import StepDecomposeQueryTransform
from llama_index.langchain_helpers.agents import (
IndexToolConfig,
Expand Down Expand Up @@ -59,10 +58,12 @@
ResponseSynthesizer,
load_index_from_storage,
)

from llama_index.schema import TextNode
from llama_index.storage.docstore.types import RefDocInfo
from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR

from llama_index.composability import ComposableGraph
from llama_index.schema import BaseDocument

from models.embed_statics_model import EmbedStatics
from models.openai_model import Models
Expand Down Expand Up @@ -874,33 +875,25 @@ async def load_index(

async def index_to_docs(
self, old_index, chunk_size: int = 4000, chunk_overlap: int = 200
) -> List[BaseDocument]:
) -> List[Document]:
documents = []
docstore = old_index.docstore
ref_docs = old_index.ref_doc_info

for doc_id in docstore.docs.keys():
for document in ref_docs.values():
text = ""

document = docstore.get_document(doc_id)
if document is not None:
node = docstore.get_node(document.get_doc_id())
while node is not None:
extra_info = node.extra_info
text += f"{node.text} "
next_node_id = node.relationships.get(
DocumentRelationship.NEXT, None
)
node = docstore.get_node(next_node_id) if next_node_id else None
for node in document.node_ids:
node = docstore.get_node(node)
text += f"{node.text} "

text_splitter = TokenTextSplitter(
separator=" ", chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
text_chunks = text_splitter.split_text(text)

for chunk_text in text_chunks:
new_doc = Document(text=chunk_text, extra_info=extra_info)
new_doc = Document(text=chunk_text, extra_info=document.metadata)
documents.append(new_doc)
print(new_doc)

return documents

Expand Down Expand Up @@ -1283,7 +1276,7 @@ async def load_data(
channel_id, limit=limit, oldest_first=oldest_first
)
results.append(
Document(channel_content, extra_info={"channel_name": channel_name})
Document(text=channel_content, extra_info={"channel_name": channel_name})
)
return results

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ dependencies = [
"sqlitedict==2.1.0",
"backoff==2.2.1",
"flask==2.2.3",
"llama-index==0.6.30",
"llama-index==0.6.38",
"pypdf==3.11.1",
"youtube_transcript_api==0.5.0",
"sentencepiece==0.1.99",
"protobuf==3.20.2",
"python-pptx==0.6.21",
"langchain==0.0.208",
"langchain==0.0.268",
"unidecode==1.3.6",
"tqdm==4.64.1",
"docx2txt==0.8",
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.6.30
llama-index==0.6.38
pypdf==3.11.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.99
protobuf==3.20.2
python-pptx==0.6.21
sentence-transformers==2.2.2
langchain==0.0.208
langchain==0.0.268
openai-whisper
unidecode==1.3.6
tqdm==4.64.1
Expand Down
4 changes: 2 additions & 2 deletions requirements_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.6.30
llama-index==0.6.38
pypdf==3.11.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.99
protobuf==3.20.2
python-pptx==0.6.21
langchain==0.0.208
langchain==0.0.268
unidecode==1.3.6
tqdm==4.64.1
docx2txt==0.8
Expand Down
Loading