Skip to content

Commit

Permalink
fix: user-agent for scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
glorenzo972 committed Jul 1, 2024
1 parent 9c505d5 commit b0cd3ce
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
*Andrea Sponziello*
### **Copyrigth**: *Tiledesk SRL*

## [2024-07-01]
### 0.2.5
- fix: user-agent for scrape

## [2024-07-01]
### 0.2.4
- fix: scrape_type=0
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ pc.create_index(const.PINECONE_INDEX,
```

## Models
Models for /api/ask

### OpenAI - engine: openai
- gpt-3.5-turbo
- gpt-4
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tilellm"
version = "0.2.4"
version = "0.2.5"
description = "tiledesk for RAG"
authors = ["Gianluca Lorenzo <[email protected]>"]
repository = "https://github.com/Tiledesk/tiledesk-llm"
Expand Down Expand Up @@ -32,7 +32,7 @@ pinecone-client = "^4.1.1"
python-dotenv = "^1.0.1"
langchain_community = "0.2.x"
tiktoken = "0.7.x"
beautifulsoup4 ="^4.12.3"
beautifulsoup4 = "^4.12.3"
#uvicorn = "^0.28"
unstructured= "0.14.x"
#playwright = "^1.43.0"
Expand Down
10 changes: 8 additions & 2 deletions tilellm/models/item_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel, Field, field_validator, ValidationError
from typing import Dict, Optional, List
from typing import Dict, Optional, List, Union
import datetime


Expand Down Expand Up @@ -76,9 +76,15 @@ def top_k_range(cls, v):
return v


class AWSAuthentication(BaseModel):
aws_access_key_id: str
aws_secret_access_key: str
region_name: str


class QuestionToLLM(BaseModel):
question: str
llm_key: str
llm_key: Union[str, AWSAuthentication]
llm: str
model: str = Field(default="gpt-3.5-turbo")
temperature: float = Field(default=0.0)
Expand Down
33 changes: 33 additions & 0 deletions tilellm/shared/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import logging

import langchain_aws
from langchain_voyageai import VoyageAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from tilellm.shared import const
Expand Down Expand Up @@ -119,6 +120,38 @@ async def wrapper(question, *args, **kwargs):
max_tokens=question.max_tokens
)

elif question.llm == "aws":
import os

os.environ["AWS_SECRET_ACCESS_KEY"] = question.llm_key.aws_secret_access_key
os.environ["AWS_ACCESS_KEY_ID"] = question.llm_key.aws_access_key_id

# chat_model = ChatBedrock(model_id=question.model,
# model_kwargs={"temperature": question.temperature,"max_tokens":question.max_tokens },
# region_name="eu-central-1"
# )

import boto3
#session = boto3.Session(
# aws_access_key_id=question.llm_key.aws_secret_access_key,
# aws_secret_access_key=question.llm_key.aws_secret_access_key,
# region_name=question.llm_key.region_name
# )



chat_model = ChatBedrockConverse(
model=question.model,
temperature=question.temperature,
max_tokens=question.max_tokens,
region_name=question.llm_key.region_name

# base_url="http://bedroc-proxy-paacejvmzcgv-121947512.eu-central-1.elb.amazonaws.com/api/v1/",

) # model_kwargs={"temperature": 0.001},

#print(chat_model.session)

else:
chat_model = ChatOpenAI(api_key=question.llm_key,
model=question.model,
Expand Down
6 changes: 4 additions & 2 deletions tilellm/tools/document_tool_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ def get_content_by_url(url: str, scrape_type: int):
urls = [url]
if scrape_type == 0:
loader = UnstructuredURLLoader(
urls=urls, mode="elements", strategy="fast", continue_on_failure=False
urls=urls, mode="elements", strategy="fast", continue_on_failure=False,
headers={'user-agent': 'Mozilla/5.0'}
)
else:
loader = UnstructuredURLLoader(
urls=urls, mode="single", continue_on_failure=False
urls=urls, mode="single", continue_on_failure=False,
headers={'user-agent': 'Mozilla/5.0'}
)
docs = loader.load()

Expand Down

0 comments on commit b0cd3ce

Please sign in to comment.