From da05927d630f1f03b505bdf05888d2e9582a53d2 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Thu, 27 Jun 2024 18:15:31 +0800 Subject: [PATCH] Fix model bug and push (#35) --- .../model_implementation/benchmark/eval.py | 11 ++++++- .../model_implementation/one_shot/__init__.py | 4 ++- rdagent/model_implementation/task.py | 33 ++++++++++++++----- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/rdagent/model_implementation/benchmark/eval.py b/rdagent/model_implementation/benchmark/eval.py index 394b6f7e..7d9289be 100644 --- a/rdagent/model_implementation/benchmark/eval.py +++ b/rdagent/model_implementation/benchmark/eval.py @@ -19,7 +19,16 @@ class ModelImpValEval: Assumption: - If the model structure is similar, the output will change in similar way when we change the input. - - we try to initialize the model param in similar value. So only the model structure is different. + + Challenge: + - The key difference between it and implementing factors is that we have parameters in the layers (Factor operators often have no parameters or are given parameters). + - we try to initialize the model param in similar value. So only the model structure is different. + + Comparing the correlation of following sequences + - modelA[init1](input1).hidden_out1, modelA[init1](input2).hidden_out1, ... + - modelB[init1](input1).hidden_out1, modelB[init1](input2).hidden_out1, ... + + For each hidden output, we can calculate a correlation. The average correlation will be the metrics. """ def evaluate(self, gt: ModelTaskImpl, gen: ModelTaskImpl): diff --git a/rdagent/model_implementation/one_shot/__init__.py b/rdagent/model_implementation/one_shot/__init__.py index 357f8c03..2e02fdb9 100644 --- a/rdagent/model_implementation/one_shot/__init__.py +++ b/rdagent/model_implementation/one_shot/__init__.py @@ -1,3 +1,4 @@ +import re from typing import Sequence from rdagent.oai.llm_utils import APIBackend @@ -36,7 +37,8 @@ def generate(self, task_l: Sequence[ModelImplTask]) -> Sequence[ModelTaskImpl]: ) # Extract the code part from the response - code = resp.split("```python")[1].split("```")[0] + match = re.search(r".*```[Pp]ython\n(.*)\n```.*", resp, re.DOTALL) + code = match.group(1) mti.inject_code(**{"model.py": code}) mti_l.append(mti) return mti_l diff --git a/rdagent/model_implementation/task.py b/rdagent/model_implementation/task.py index 13efba40..0a69aee4 100644 --- a/rdagent/model_implementation/task.py +++ b/rdagent/model_implementation/task.py @@ -41,18 +41,35 @@ def __init__(self, json_uri: str) -> None: def load(self, *argT, **kwargs) -> Sequence[ModelImplTask]: # TODO: we should load the tasks from json; + # this version does not align with the right answer + # formula_info = { + # "name": "Anti-Symmetric Deep Graph Network (A-DGN)", + # "description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.", + # "formulation": "x_u^{(l)} = x_u^{(l-1)} + \\epsilon \\sigma \\left( W^T x_u^{(l-1)} + \\Phi(X^{(l-1)}, N_u) + b \\right)", + # "variables": { + # "x_u^{(l)}": "The state of node u at layer l", + # "\\epsilon": "The step size in the Euler discretization", + # "\\sigma": "A monotonically non-decreasing activation function", + # "W": "An anti-symmetric weight matrix", + # "X^{(l-1)}": "The node feature matrix at layer l-1", + # "N_u": "The set of neighbors of node u", + # "b": "A bias vector", + # }, + # "key": "A-DGN", + # } formula_info = { "name": "Anti-Symmetric Deep Graph Network (A-DGN)", "description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.", - "formulation": "x_u^{(l)} = x_u^{(l-1)} + \\epsilon \\sigma \\left( W^T x_u^{(l-1)} + \\Phi(X^{(l-1)}, N_u) + b \\right)", + "formulation": r"\mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left( (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i + \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right),", "variables": { - "x_u^{(l)}": "The state of node u at layer l", - "\\epsilon": "The step size in the Euler discretization", - "\\sigma": "A monotonically non-decreasing activation function", - "W": "An anti-symmetric weight matrix", - "X^{(l-1)}": "The node feature matrix at layer l-1", - "N_u": "The set of neighbors of node u", - "b": "A bias vector", + r"\mathbf{x}_i": "The state of node i at previous layer", + r"\epsilon": "The step size in the Euler discretization", + r"\sigma": "A monotonically non-decreasing activation function", + r"\Phi": "A graph convolutional operator", + r"W": "An anti-symmetric weight matrix", + r"\mathbf{x}^{\prime}_i": "The node feature matrix at layer l-1", + r"\mathcal{N}_i": "The set of neighbors of node u", + r"\mathbf{b}": "A bias vector", }, "key": "A-DGN", }