update and clean the scripts but not finished

ulab-uiuc · Oct 9, 2024 · 558e031 · 558e031
1 parent 8693e2c
commit 558e031
Show file tree

Hide file tree

Showing 7 changed files with 171 additions and 283 deletions.
diff --git a/research_bench/metrics.py → research_bench/proposal_eval.py b/research_bench/metrics.py → research_bench/proposal_eval.py
@@ -12,16 +12,6 @@
 
 
 def compute_bleu(reference: str, hypothesis: str) -> float:
-    """
-    Computes the BLEU score between reference and hypothesis texts.
-
-    Args:
-        reference (str): Reference text.
-        hypothesis (str): Hypothesis text.
-
-    Returns:
-        float: BLEU score.
-    """
     try:
         reference_tokens = nltk.word_tokenize(reference.lower())
         hypothesis_tokens = nltk.word_tokenize(hypothesis.lower())
@@ -36,16 +26,6 @@ def compute_bleu(reference: str, hypothesis: str) -> float:
 
 
 def compute_rouge_l(reference: str, hypothesis: str) -> float:
-    """
-    Computes the ROUGE-L score between reference and hypothesis texts.
-
-    Args:
-        reference (str): Reference text.
-        hypothesis (str): Hypothesis text.
-
-    Returns:
-        float: ROUGE-L F1 score.
-    """
     try:
         scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
         scores = scorer.score(reference, hypothesis)
@@ -57,16 +37,6 @@ def compute_rouge_l(reference: str, hypothesis: str) -> float:
 
 
 def compute_bertscore(reference: str, hypothesis: str) -> float:
-    """
-    Computes the BERTScore between reference and hypothesis texts.
-
-    Args:
-        reference (str): Reference text.
-        hypothesis (str): Hypothesis text.
-
-    Returns:
-        float: BERTScore F1 score.
-    """
     try:
         # Compute BERTScore
         P, R, F1 = score(
@@ -79,16 +49,6 @@ def compute_bertscore(reference: str, hypothesis: str) -> float:
 
 
 def compute_gpt_metric(current_5q: str, proposal_5q: str) -> Optional[float]:
-    """
-    Computes a custom GPT-based metric to evaluate if the proposal_5q reflects the current_5q.
-
-    Args:
-        current_5q (str): The current five core questions.
-        proposal_5q (str): The proposed five core questions.
-
-    Returns:
-        Optional[float]: A similarity score between 0 and 1.
-    """
     try:
         prompt = [
             {

diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
@@ -0,0 +1,136 @@
+from research_town.agents import AgentManager
+from research_town.configs import Config
+from research_town.dbs import LogDB, PaperDB, ProfileDB, ProgressDB
+from research_town.envs import ProposalWritingEnv
+from research_town.utils.paper_collector import get_paper_introduction
+from typing import List, Optional
+import logging
+import os
+from typing import Optional
+from research_town.utils.model_prompting import model_prompting
+
+
+def write_proposal_researchtown(
+    authors: List[str], intros: List[str], keyword: str, id: int
+) -> Optional[str]:
+    """
+    Generates a comprehensive research proposal based on the provided authors and existing proposals
+    using the ProposalWritingEnv environment.
+
+    Args:
+        authors (List[str]): List of author names.
+        intros (List[str]): List of existing introduction texts.
+
+    Returns:
+        Optional[str]: Generated proposal as a string if successful, else None.
+    """
+
+    config = Config('../configs')
+    if os.path.exists(f'./profile_dbs/profile_{id}'):
+        profile_db = ProfileDB(load_file_path=f'./profile_dbs/profile_{id}')
+    else:
+        profile_db = ProfileDB()
+        profile_db.pull_profiles(names=authors, config=config)
+        profile_db.save_to_json(f'./profile_dbs/profile_{id}')
+
+    # Initialize other databases using default instances
+    log_db = LogDB()
+    progress_db = ProgressDB()
+    paper_db = PaperDB()  # Assuming existing papers are handled elsewhere
+    paper_db.pull_papers(num=3, domain=keyword)
+    # Initialize ProposalWritingEnv with the required databases and configuration
+    agent_manager = AgentManager(config=config, profile_db=profile_db)
+    env = ProposalWritingEnv(
+        name='proposal_writing',
+        log_db=log_db,
+        progress_db=progress_db,
+        paper_db=paper_db,
+        config=config,
+        agent_manager=agent_manager,
+    )
+    logger.info('Initialized ProposalWritingEnv.')
+
+    # Create a leader agent (assuming `create_leader` requires a profile)
+    leader_profile = profile_db.get(name=authors[0])[0]
+    print('leader_profile', leader_profile)
+    leader = agent_manager.create_agent(leader_profile, role='leader')
+    if not leader_profile:
+        logger.error('No valid leader profile found.')
+        return None
+    logger.info('Created leader agent for profile')
+
+    # Prepare the context from existing proposals
+    # Assuming that the context should be a list of proposal strings
+    env.on_enter(
+        leader=leader,
+        contexts=intros,
+    )
+    logger.info('Entered ProposalWritingEnv with provided proposals as context.')
+
+    # Run the environment to generate the proposal
+    run_result = env.run()
+    if run_result is not None:
+        for progress, agent in run_result:
+            # Process progress and agent if needed
+            pass
+    logger.info('Ran ProposalWritingEnv.')
+
+    # Exit the environment and retrieve the generated proposal
+    exit_status, exit_dict = env.on_exit()
+    proposal = exit_dict.get('proposal')
+    if proposal and proposal.content:
+        logger.info('Successfully generated proposal.')
+        return str(proposal.content)
+    else:
+        logger.warning('Proposal generation returned no content.')
+        return None
+
+
+def write_proposal_baseline(intro: str, model: str = 'gpt-4o-mini') -> Optional[str]:
+    """
+    Generates the five core research questions based on the introduction text using an LLM.
+
+    Args:
+        intro (str): Introduction text of the paper.
+
+    Returns:
+        Optional[str]: Generated five core questions as a string.
+    """
+    try:
+        prompt = [
+            {
+                'role': 'user',
+                'content': (
+                    'Here is a high-level summarized insight of a research field Machine Learning.\n\n'
+                    'Here are the five core questions:\n\n'
+                    '[Question 1] - What is the problem?\n\n'
+                    'Formulate the specific research question you aim to address. Only output one question and do not include any more information.\n\n'
+                    '[Question 2] - Why is it interesting and important?\n\n'
+                    'Explain the broader implications of solving this problem for the research community.\n'
+                    'Discuss how such paper will affect the future research.\n'
+                    'Discuss how addressing this question could advance knowledge or lead to practical applications.\n\n'
+                    '[Question 3] - Why is it hard?\n\n'
+                    'Discuss the challenges and complexities involved in solving this problem.\n'
+                    'Explain why naive or straightforward approaches may fail.\n'
+                    'Identify any technical, theoretical, or practical obstacles that need to be overcome. MAKE IT CLEAR.\n\n'
+                    "[Question 4] - Why hasn't it been solved before?\n\n"
+                    'Identify gaps or limitations in previous research or existing solutions.\n'
+                    'Discuss any barriers that have prevented this problem from being solved until now.\n'
+                    'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
+                    '[Question 5] - What are the key components of my approach and results?\n\n'
+                    'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                    'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
+                    f'Introduction:\n{intro}\n\n'
+                    'Please provide the five core questions contents based on the above introduction.'
+                ),
+            }
+        ]
+        response = model_prompting(model, prompt, mode='TEST')
+        if response and len(response) > 0 and len(response[0]) > 0:
+            return response[0]
+        else:
+            print('Received empty response from model_prompting for current_5q.')
+            return None
+    except Exception as e:
+        print(f'Error generating current_5q: {e}')
+        return None
diff --git a/research_bench/run.py b/research_bench/run.py
@@ -0,0 +1,33 @@
+import argparse
+from proposal_writing import write_proposal_researchtown
+
+
+def main(args: argparse.Namespace) -> None:
+
+    return
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--input', type=str, required=True, help='Path to the input JSON file.'
+    )
+    parser.add_argument(
+        '--output', type=str, required=True, help='Path to the output JSONL file.'
+    )
+    parser.add_argument(
+        '--intro_log',
+        type=str,
+        required=False,
+        help='Path to the introduction log JSONL file.',
+    )
+    parser.add_argument(
+        '--model_name',
+        type=str,
+        required=False,
+        default='gpt-40-mini',
+        help='Model name for the single agent test.',
+    )
+    args = parser.parse_args()
+
+    main(args)
diff --git a/research_bench/README.md → research_bench/run_baseline.py b/research_bench/README.md → research_bench/run_baseline.py
diff --git a/research_bench/run_evaluation.py b/research_bench/run_evaluation.py
@@ -4,7 +4,7 @@
 import os
 from typing import Any, Dict, List, Optional
 
-from metrics import compute_bertscore, compute_bleu, compute_gpt_metric, compute_rouge_l
+from research_bench.proposal_eval import compute_bertscore, compute_bleu, compute_gpt_metric, compute_rouge_l
 from tqdm import tqdm
 from utils import get_current_5q, single_agent_proposal_writing
 
@@ -23,80 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-def get_proposal_5q(
-    authors: List[str], intros: List[str], keyword: str, id: int
-) -> Optional[str]:
-    """
-    Generates a comprehensive research proposal based on the provided authors and existing proposals
-    using the ProposalWritingEnv environment.
-
-    Args:
-        authors (List[str]): List of author names.
-        intros (List[str]): List of existing introduction texts.
-
-    Returns:
-        Optional[str]: Generated proposal as a string if successful, else None.
-    """
-
-    config = Config('../configs')
-    if os.path.exists(f'./profile_dbs/profile_{id}'):
-        profile_db = ProfileDB(load_file_path=f'./profile_dbs/profile_{id}')
-    else:
-        profile_db = ProfileDB()
-        profile_db.pull_profiles(names=authors, config=config)
-        profile_db.save_to_json(f'./profile_dbs/profile_{id}')
-
-    # Initialize other databases using default instances
-    log_db = LogDB()
-    progress_db = ProgressDB()
-    paper_db = PaperDB()  # Assuming existing papers are handled elsewhere
-    paper_db.pull_papers(num=3, domain=keyword)
-    # Initialize ProposalWritingEnv with the required databases and configuration
-    agent_manager = AgentManager(config=config, profile_db=profile_db)
-    env = ProposalWritingEnv(
-        name='proposal_writing',
-        log_db=log_db,
-        progress_db=progress_db,
-        paper_db=paper_db,
-        config=config,
-        agent_manager=agent_manager,
-    )
-    logger.info('Initialized ProposalWritingEnv.')
-
-    # Create a leader agent (assuming `create_leader` requires a profile)
-    leader_profile = profile_db.get(name=authors[0])[0]
-    print('leader_profile', leader_profile)
-    leader = agent_manager.create_agent(leader_profile, role='leader')
-    if not leader_profile:
-        logger.error('No valid leader profile found.')
-        return None
-    logger.info('Created leader agent for profile')
-
-    # Prepare the context from existing proposals
-    # Assuming that the context should be a list of proposal strings
-    env.on_enter(
-        leader=leader,
-        contexts=intros,
-    )
-    logger.info('Entered ProposalWritingEnv with provided proposals as context.')
-
-    # Run the environment to generate the proposal
-    run_result = env.run()
-    if run_result is not None:
-        for progress, agent in run_result:
-            # Process progress and agent if needed
-            pass
-    logger.info('Ran ProposalWritingEnv.')
-
-    # Exit the environment and retrieve the generated proposal
-    exit_status, exit_dict = env.on_exit()
-    proposal = exit_dict.get('proposal')
-    if proposal and proposal.content:
-        logger.info('Successfully generated proposal.')
-        return str(proposal.content)
-    else:
-        logger.warning('Proposal generation returned no content.')
-        return None
 
 
 def process_paper(

diff --git a/research_bench/run_evaluation.sh b/research_bench/run_evaluation.sh