Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to use local LLMs and filter sensitive information #20

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions create_har.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
import json
from playwright.async_api import async_playwright

def filter_sensitive_info(request):
sensitive_headers = ["Authorization", "Cookie"]
filtered_headers = [
header for header in request.headers if header["name"] not in sensitive_headers
]
request.headers = filtered_headers

async def open_browser_and_wait():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)

context = await browser.new_context(
record_har_path="network_requests.har", # Path to save the HAR file
record_har_path="filtered_network_requests.har", # Path to save the filtered HAR file
record_har_content="embed", # Omit content to make the HAR file smaller
# TODO record_har_url_filter="*", # Optional URL filter
)

page = await context.new_page()

context.on("request", filter_sensitive_info)

print(
"Browser is open. Press Enter in the terminal when you're ready to close the browser and save cookies..."
)
Expand All @@ -23,7 +30,7 @@ async def open_browser_and_wait():

cookies = await context.cookies()

with open("cookies.json", "w") as f:
with open("filtered_cookies.json", "w") as f:
json.dump(cookies, f, indent=4)

await context.close()
Expand Down
9 changes: 8 additions & 1 deletion integuru/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,14 @@
default=False,
help="Whether to generate the full integration code",
)
@click.option(
"--use-local-llm",
is_flag=True,
default=False,
help="Whether to use a local LLM instead of sending data to OpenAI",
)
def cli(
model, prompt, har_path, cookie_path, max_steps, input_variables, generate_code
model, prompt, har_path, cookie_path, max_steps, input_variables, generate_code, use_local_llm
):
input_vars = dict(input_variables)
asyncio.run(
Expand All @@ -52,6 +58,7 @@ def cli(
input_variables=input_vars,
max_steps=max_steps,
to_generate_code=generate_code,
use_local_llm=use_local_llm,
)
)

Expand Down
12 changes: 10 additions & 2 deletions integuru/util/LLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ class LLMSingleton:
_instance = None
_default_model = "gpt-4o"
_alternate_model = "o1-preview"
_local_model = None

@classmethod
def get_instance(cls, model: str = None):
def get_instance(cls, model: str = None, use_local: bool = False):
if use_local and cls._local_model:
return cls._local_model

if model is None:
model = cls._default_model

Expand Down Expand Up @@ -34,5 +38,9 @@ def switch_to_alternate_model(cls):

return cls._instance

llm = LLMSingleton()
@classmethod
def set_local_model(cls, local_model_instance):
"""Set a local model instance to use instead of OpenAI"""
cls._local_model = local_model_instance

llm = LLMSingleton()
30 changes: 30 additions & 0 deletions integuru/util/har_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,40 @@
"plausible",
)

sensitive_keywords = (
"Authorization",
"Token",
"Auth",
"Password",
"Secret",
"Key",
"Credential",
"Session",
"Bearer",
)

def filter_sensitive_info(request: Dict[str, Any]) -> Dict[str, Any]:
"""
Filters out sensitive information from the request headers and body.
"""
filtered_headers = {
k: v for k, v in request.get("headers", {}).items()
if not any(keyword.lower() in k.lower() for keyword in sensitive_keywords)
}
request["headers"] = filtered_headers

if "postData" in request:
post_data = request["postData"].get("text", "")
if any(keyword.lower() in post_data.lower() for keyword in sensitive_keywords):
request["postData"]["text"] = "[FILTERED]"

return request

def format_request(har_request: Dict[str, Any]) -> Request:
"""
Formats a HAR request into a Request object.
"""
har_request = filter_sensitive_info(har_request)
method = har_request.get("method", "GET")
url = har_request.get("url", "")

Expand Down