From c36c1864572483cc161d8d947b78faa695ba2d9b Mon Sep 17 00:00:00 2001 From: glorenzo972 Date: Sat, 14 Sep 2024 11:25:45 +0200 Subject: [PATCH] modified default value for scarpe type:4 --- .gitignore | 3 +++ CHANGELOG.md | 5 +++++ pyproject.toml | 2 +- tilellm/models/item_model.py | 4 ++-- tilellm/store/pinecone/pinecone_repository_serverless.py | 2 +- worker/package.json | 2 +- 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 17b2a6f..0450b64 100644 --- a/.gitignore +++ b/.gitignore @@ -113,6 +113,9 @@ celerybeat.pid .venv .environ .environ.prod +deploy_old.sh +docker-compose-test.yml +docker-compose.yml env/ venv/ ENV/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b5ea79..9ea7b38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ *Andrea Sponziello* ### **Copyrigth**: *Tiledesk SRL* +## [2024-09-14] +### 0.2.18 +- upgrade: worker +- modify: default value for scrape type: 4 + ## [2024-09-05] ### 0.2.17 - fix: nltk download on Dockerfile diff --git a/pyproject.toml b/pyproject.toml index c6db9d5..416ba3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tilellm" -version = "0.2.17" +version = "0.2.18" description = "tiledesk for RAG" authors = ["Gianluca Lorenzo "] repository = "https://github.com/Tiledesk/tiledesk-llm" diff --git a/tilellm/models/item_model.py b/tilellm/models/item_model.py index 60a98f5..f435e92 100644 --- a/tilellm/models/item_model.py +++ b/tilellm/models/item_model.py @@ -10,8 +10,8 @@ class ParametersScrapeType4(BaseModel): tags_to_extract: Optional[List[str]] = Field(default_factory=list) unwanted_classnames: Optional[List[str]] = Field(default_factory=list) desired_classnames: Optional[List[str]] = Field(default_factory=list) - remove_lines: Optional[bool] = Field(default=False) - remove_comments: Optional[bool] = Field(default=False) + remove_lines: Optional[bool] = Field(default=True) + remove_comments: Optional[bool] = Field(default=True) @model_validator(mode='after') def check_booleans(cls, values): diff --git a/tilellm/store/pinecone/pinecone_repository_serverless.py b/tilellm/store/pinecone/pinecone_repository_serverless.py index 9636441..7cda5bb 100644 --- a/tilellm/store/pinecone/pinecone_repository_serverless.py +++ b/tilellm/store/pinecone/pinecone_repository_serverless.py @@ -135,7 +135,7 @@ async def add_pc_item(self, item, embedding_obj=None, embedding_dimension=None): else: metadata = MetadataItem(id=metadata_id, source=source, type=type_source, embedding=embedding) - document = Document(page_content=content, metadata=metadata.dict()) + document = Document(page_content=content, metadata=metadata.model_dump()) #tolto dict() chunks.extend(self.chunk_data(data=[document], chunk_size=chunk_size, chunk_overlap=chunk_overlap)) total_tokens, cost = self.calc_embedding_cost(chunks, embedding) diff --git a/worker/package.json b/worker/package.json index 32d21e3..666ed8c 100644 --- a/worker/package.json +++ b/worker/package.json @@ -9,7 +9,7 @@ "author": "", "license": "ISC", "dependencies": { - "@tiledesk/tiledesk-train-jobworker": "^0.0.23", + "@tiledesk/tiledesk-train-jobworker": "^0.0.24", "dotenv": "^16.4.5", "express": "^4.19.2" }