Skip to content

Commit

Permalink
Fix model bug and push (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
you-n-g authored Jun 27, 2024
1 parent 8be82fa commit da05927
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
11 changes: 10 additions & 1 deletion rdagent/model_implementation/benchmark/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,16 @@ class ModelImpValEval:
Assumption:
- If the model structure is similar, the output will change in similar way when we change the input.
- we try to initialize the model param in similar value. So only the model structure is different.
Challenge:
- The key difference between it and implementing factors is that we have parameters in the layers (Factor operators often have no parameters or are given parameters).
- we try to initialize the model param in similar value. So only the model structure is different.
Comparing the correlation of following sequences
- modelA[init1](input1).hidden_out1, modelA[init1](input2).hidden_out1, ...
- modelB[init1](input1).hidden_out1, modelB[init1](input2).hidden_out1, ...
For each hidden output, we can calculate a correlation. The average correlation will be the metrics.
"""

def evaluate(self, gt: ModelTaskImpl, gen: ModelTaskImpl):
Expand Down
4 changes: 3 additions & 1 deletion rdagent/model_implementation/one_shot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from typing import Sequence
from rdagent.oai.llm_utils import APIBackend

Expand Down Expand Up @@ -36,7 +37,8 @@ def generate(self, task_l: Sequence[ModelImplTask]) -> Sequence[ModelTaskImpl]:
)

# Extract the code part from the response
code = resp.split("```python")[1].split("```")[0]
match = re.search(r".*```[Pp]ython\n(.*)\n```.*", resp, re.DOTALL)
code = match.group(1)
mti.inject_code(**{"model.py": code})
mti_l.append(mti)
return mti_l
33 changes: 25 additions & 8 deletions rdagent/model_implementation/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,35 @@ def __init__(self, json_uri: str) -> None:
def load(self, *argT, **kwargs) -> Sequence[ModelImplTask]:
# TODO: we should load the tasks from json;

# this version does not align with the right answer
# formula_info = {
# "name": "Anti-Symmetric Deep Graph Network (A-DGN)",
# "description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.",
# "formulation": "x_u^{(l)} = x_u^{(l-1)} + \\epsilon \\sigma \\left( W^T x_u^{(l-1)} + \\Phi(X^{(l-1)}, N_u) + b \\right)",
# "variables": {
# "x_u^{(l)}": "The state of node u at layer l",
# "\\epsilon": "The step size in the Euler discretization",
# "\\sigma": "A monotonically non-decreasing activation function",
# "W": "An anti-symmetric weight matrix",
# "X^{(l-1)}": "The node feature matrix at layer l-1",
# "N_u": "The set of neighbors of node u",
# "b": "A bias vector",
# },
# "key": "A-DGN",
# }
formula_info = {
"name": "Anti-Symmetric Deep Graph Network (A-DGN)",
"description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.",
"formulation": "x_u^{(l)} = x_u^{(l-1)} + \\epsilon \\sigma \\left( W^T x_u^{(l-1)} + \\Phi(X^{(l-1)}, N_u) + b \\right)",
"formulation": r"\mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left( (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i + \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right),",
"variables": {
"x_u^{(l)}": "The state of node u at layer l",
"\\epsilon": "The step size in the Euler discretization",
"\\sigma": "A monotonically non-decreasing activation function",
"W": "An anti-symmetric weight matrix",
"X^{(l-1)}": "The node feature matrix at layer l-1",
"N_u": "The set of neighbors of node u",
"b": "A bias vector",
r"\mathbf{x}_i": "The state of node i at previous layer",
r"\epsilon": "The step size in the Euler discretization",
r"\sigma": "A monotonically non-decreasing activation function",
r"\Phi": "A graph convolutional operator",
r"W": "An anti-symmetric weight matrix",
r"\mathbf{x}^{\prime}_i": "The node feature matrix at layer l-1",
r"\mathcal{N}_i": "The set of neighbors of node u",
r"\mathbf{b}": "A bias vector",
},
"key": "A-DGN",
}
Expand Down

0 comments on commit da05927

Please sign in to comment.