From b140715f514dc868ad839b471c55ae987b96ec42 Mon Sep 17 00:00:00 2001 From: Anjie Yang Date: Wed, 23 Oct 2024 19:02:25 +0800 Subject: [PATCH] Add Camel examples --- .gitignore | 6 +- crab/agents/backend_models/__init__.py | 1 + crab/agents/backend_models/camel_rag_model.py | 108 +++++++++++++ examples/{camel_basic.py => camel_example.py} | 52 ++----- examples/camel_rag_example.py | 146 ++++++++++++++++++ 5 files changed, 277 insertions(+), 36 deletions(-) create mode 100644 crab/agents/backend_models/camel_rag_model.py rename examples/{camel_basic.py => camel_example.py} (61%) create mode 100644 examples/camel_rag_example.py diff --git a/.gitignore b/.gitignore index 6900f8f..61f5df3 100644 --- a/.gitignore +++ b/.gitignore @@ -170,4 +170,8 @@ _build/ logs/ -.DS_Store \ No newline at end of file +.DS_Store + +# RAG data +local_data/ +vim_docs/ \ No newline at end of file diff --git a/crab/agents/backend_models/__init__.py b/crab/agents/backend_models/__init__.py index 5f36882..9175cf1 100644 --- a/crab/agents/backend_models/__init__.py +++ b/crab/agents/backend_models/__init__.py @@ -13,6 +13,7 @@ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401 from .camel_model import CamelModel +from .camel_rag_model import CamelRAGModel from .claude_model import ClaudeModel from .gemini_model import GeminiModel from .openai_model import OpenAIModel diff --git a/crab/agents/backend_models/camel_rag_model.py b/crab/agents/backend_models/camel_rag_model.py new file mode 100644 index 0000000..99222bc --- /dev/null +++ b/crab/agents/backend_models/camel_rag_model.py @@ -0,0 +1,108 @@ +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +from typing import Any, List, Optional, Tuple + +from crab import BackendOutput, MessageType +from crab.agents.backend_models.camel_model import CamelModel +from camel.messages import BaseMessage + +try: + from camel.embeddings import OpenAIEmbedding + from camel.retrievers import VectorRetriever + from camel.storages import QdrantStorage + RAG_ENABLED = True +except ImportError: + RAG_ENABLED = False + + +class CamelRAGModel(CamelModel): + def __init__( + self, + model: str, + model_platform: str, + parameters: dict[str, Any] | None = None, + history_messages_len: int = 0, + embedding_model: Optional[str] = "text-embedding-3-small", + collection_name: str = "knowledge_base", + vector_storage_path: str = "local_data", + top_k: int = 3, + similarity_threshold: float = 0.75, + ) -> None: + if not RAG_ENABLED: + raise ImportError( + "Please install RAG dependencies: " + "pip install camel-ai[embeddings,retrievers,storages]" + ) + + super().__init__(model, model_platform, parameters, history_messages_len) + + self.embedding_model = OpenAIEmbedding() if embedding_model else None + + if self.embedding_model: + self.vector_storage = QdrantStorage( + vector_dim=self.embedding_model.get_output_dim(), + path=vector_storage_path, + collection_name=collection_name, + ) + self.retriever = VectorRetriever( + embedding_model=self.embedding_model + ) + else: + self.vector_storage = None + self.retriever = None + + self.top_k = top_k + self.similarity_threshold = similarity_threshold + + def process_documents(self, content_path: str) -> None: + if not self.retriever or not self.vector_storage: + raise ValueError("RAG components not initialized") + + self.retriever.process( + content=content_path, + storage=self.vector_storage, + ) + + def _enhance_with_context(self, messages: List[Tuple[str, MessageType]]) -> List[Tuple[str, MessageType]]: + if not self.retriever or not self.vector_storage: + return messages + + query = next( + (msg[0] for msg in messages if msg[1] != MessageType.IMAGE_JPG_BASE64), + "" + ) + + retrieved_info = self.retriever.query( + query=query, + top_k=self.top_k, + similarity_threshold=self.similarity_threshold, + ) + + if not retrieved_info or retrieved_info[0].get('text', '').startswith('No suitable information'): + return messages + + context = "Relevant context:\n\n" + for info in retrieved_info: + context += f"From {info.get('content path', 'unknown')}:\n" + context += f"{info.get('text', '')}\n\n" + + enhanced_messages = [] + enhanced_messages.append((context, MessageType.TEXT)) + enhanced_messages.extend(messages) + + return enhanced_messages + + def chat(self, messages: List[Tuple[str, MessageType]]) -> BackendOutput: + enhanced_messages = self._enhance_with_context(messages) + return super().chat(enhanced_messages) diff --git a/examples/camel_basic.py b/examples/camel_example.py similarity index 61% rename from examples/camel_basic.py rename to examples/camel_example.py index 2f184f8..37e49ca 100644 --- a/examples/camel_basic.py +++ b/examples/camel_example.py @@ -12,35 +12,15 @@ # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from termcolor import colored - -from camel.societies import RolePlaying -from camel.utils import print_text_animated +import os from crab import Benchmark, create_benchmark -from crab.agents.backend_models import OpenAIModel +from crab.agents.backend_models.camel_model import CamelModel from crab.agents.policies import SingleAgentPolicy from crab.benchmarks.template import template_benchmark_config +from camel.types import ModelType, ModelPlatformType +from camel.models import ModelFactory -def camel_task_generator(): - task_prompt = "Design a custom game using pygame" - print(colored(f"Original task prompt:\n{task_prompt}\n", "yellow")) - role_play_session = RolePlaying("Computer Programmer", "Gamer", task_prompt=task_prompt) - print(colored(f"Specified task prompt:\n{role_play_session.task_prompt}\n", "cyan")) - - chat_turn_limit, n = 50, 0 - input_msg = role_play_session.init_chat() - while n < chat_turn_limit: - n += 1 - assistant_response, user_response = role_play_session.step(input_msg) - print_text_animated(colored(f"AI User:\n\n{user_response.msg.content}\n", "blue")) - print_text_animated(colored(f"AI Assistant:\n\n{assistant_response.msg.content}\n", "green")) - - if "CAMEL_TASK_DONE" in user_response.msg.content: - break - - input_msg = assistant_response.msg - - return role_play_session.task_prompt def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): for step in range(20): @@ -74,23 +54,25 @@ def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): print("=" * 40) print( colored( - f"Task finished, result: {response.evaluation_results}", - "green" + f"Task finished, result: {response.evaluation_results}", "green" ) ) return -if __name__ == "__main__": - task_description = camel_task_generator() +if __name__ == "__main__": benchmark = create_benchmark(template_benchmark_config) - task, action_space = benchmark.start_task("0", task_description) + task, action_space = benchmark.start_task("0") env_descriptions = benchmark.get_env_descriptions() - model = OpenAIModel(model="gpt-4o", history_messages_len=5) - agent = SingleAgentPolicy(model_backend=model) - agent.reset(task_description, action_space, env_descriptions) - - print("Start performing task: " + colored(f'"{task_description}"', "green")) + # TODO: Use local model + camel_model = CamelModel( + model="gpt-4o", + model_platform=ModelPlatformType.OPENAI, + parameters={"temperature": 0.7}, + ) + agent = SingleAgentPolicy(model_backend=camel_model) + agent.reset(task.description, action_space, env_descriptions) + print("Start performing task: " + colored(f'"{task.description}"', "green")) start_benchmark(benchmark, agent) - benchmark.reset() \ No newline at end of file + benchmark.reset() diff --git a/examples/camel_rag_example.py b/examples/camel_rag_example.py new file mode 100644 index 0000000..65c2865 --- /dev/null +++ b/examples/camel_rag_example.py @@ -0,0 +1,146 @@ +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +from termcolor import colored +import os +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin + +from crab import Benchmark, create_benchmark +from crab.agents.backend_models.camel_rag_model import CamelRAGModel +from crab.agents.policies import SingleAgentPolicy +from crab.benchmarks.template import template_benchmark_config +from camel.types import ModelType, ModelPlatformType + + +def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): + for step in range(20): + print("=" * 40) + print(f"Start agent step {step}:") + observation = benchmark.observe()["template_env"] + print(f"Current environment observation: {observation}") + response = agent.chat( + { + "template_env": [ + (f"Current environment observation: {observation}", 0), + ] + } + ) + print(colored(f"Agent take action: {response}", "blue")) + + for action in response: + response = benchmark.step( + action=action.name, + parameters=action.arguments, + env_name=action.env, + ) + print( + colored( + f'Action "{action.name}" success, stat: ' + f"{response.evaluation_results}", + "green", + ) + ) + if response.terminated: + print("=" * 40) + print( + colored( + f"Task finished, result: {response.evaluation_results}", + "green" + ) + ) + return + + +def prepare_vim_docs(): + """Prepare Vim documentation for RAG""" + print(colored("Starting Vim documentation preparation...", "yellow")) + base_url = "https://vimdoc.sourceforge.net/htmldoc/usr_07.html" + content_dir = "vim_docs" + os.makedirs(content_dir, exist_ok=True) + + print(colored("Fetching main page...", "yellow")) + response = requests.get(base_url) + soup = BeautifulSoup(response.text, 'html.parser') + + # Process the main page first + main_content = soup.get_text(separator='\n', strip=True) + with open(os.path.join(content_dir, "main.txt"), 'w', encoding='utf-8') as f: + f.write(f"Source: {base_url}\n\n{main_content}") + + links = [link for link in soup.find_all('a') + if link.get('href') and not link.get('href').startswith(('#', 'http'))] + total_links = len(links) + print(colored(f"Found {total_links} documentation pages to process", "yellow")) + + processed_files = [] + for idx, link in enumerate(links, 1): + href = link.get('href') + full_url = urljoin(base_url, href) + try: + print(colored(f"Processing page {idx}/{total_links}: {href}", "yellow")) + + # Fetch and process page + page_response = requests.get(full_url) + page_soup = BeautifulSoup(page_response.text, 'html.parser') + for tag in page_soup(['script', 'style']): + tag.decompose() + content = page_soup.get_text(separator='\n', strip=True) + + # Save content + filename = os.path.join(content_dir, f"{href.replace('/', '_')}.txt") + with open(filename, 'w', encoding='utf-8') as f: + f.write(f"Source: {full_url}\n\n{content}") + processed_files.append(filename) + print(colored(f"✓ Saved {href}", "green")) + + except Exception as e: + print(colored(f"✗ Error processing {full_url}: {e}", "red")) + + print(colored("Documentation preparation completed!", "green")) + return processed_files + + +if __name__ == "__main__": + print(colored("=== Starting RAG-enhanced benchmark ===", "cyan")) + + # Initialize benchmark and environment + print(colored("\nInitializing benchmark environment...", "yellow")) + benchmark = create_benchmark(template_benchmark_config) + task, action_space = benchmark.start_task("0") + env_descriptions = benchmark.get_env_descriptions() + + doc_files = prepare_vim_docs() + + print(colored("\nInitializing RAG model...", "yellow")) + rag_model = CamelRAGModel( + model="gpt-4o", + model_platform=ModelPlatformType.OPENAI, + parameters={"temperature": 0.7} + ) + + print(colored("Processing documents for RAG...", "yellow")) + for doc_file in doc_files: + print(colored(f"Processing {doc_file}...", "yellow")) + rag_model.process_documents(doc_file) + print(colored("RAG model initialization complete!", "green")) + + print(colored("\nSetting up agent...", "yellow")) + agent = SingleAgentPolicy(model_backend=rag_model) + agent.reset(task.description, action_space, env_descriptions) + + print(colored("\nStarting benchmark execution:", "cyan")) + print("Start performing task: " + colored(f'"{task.description}"', "green")) + start_benchmark(benchmark, agent) + benchmark.reset()