Skip to content

Commit

Permalink
amr enrichment
Browse files Browse the repository at this point in the history
  • Loading branch information
j2whiting committed Aug 14, 2024
1 parent d9b8b5a commit 752a368
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 1 deletion.
25 changes: 25 additions & 0 deletions gollm/openai/prompts/amr_enrichment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
ENRICH_PROMPT = """
You are a helpful agent designed to extract metadata associated with petrinet models. \
You will focus on extracting descriptions and units for each initial place and transition in the model.
For context:
In a Petri net model, initials represent the initial state of the system through the initial distribution of tokens across the places, known as the initial marking. Each place corresponds to a variable or state in the system, such as a species concentration in a reaction, and the number of tokens reflects the initial conditions of the ODEs.
Parameters define the system's rules and rates of evolution, including transition rates (analogous to reaction rates in ODEs) that determine how quickly tokens move between places. These parameters also include stoichiometric relationships, represented by the weights of arcs connecting places and transitions, dictating how many tokens are consumed or produced when a transition occurs.
Your initials and parameters to extract are: {param_initial_dict}
Extract descriptions and units from the following research paper: {paper_text}\n###PAPER END###\n
Please provide your output in the following json format:
{{'initials': {{'place1': {{'description': '...', 'units': '...'}}, 'place2': {{'description': '...', 'units': '...'}}, ...}}, 'parameters': {{'transition1': {{'description': '...', 'units': '...'}}, 'transition2': {{'description': '...', 'units': '...'}}, ...}}}}
Ensure that units are provided in both a unicode string and mathml format like so:
"units": {{ "expression": "1/(person*day)", "expression_mathml": "<apply><divide/><cn>1</cn><apply><times/><ci>person</ci><ci>day</ci></apply></apply>" }}
Where 'placeN' and 'transitionN' are the names of the intials and parameters to extract as found in the provided dictionary.
Begin:
"""
27 changes: 26 additions & 1 deletion gollm/openai/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
exceeds_tokens,
model_config_adapter,
postprocess_oai_json,
parse_param_initials,
)
from gollm.openai.prompts.petrinet_config import PETRINET_PROMPT
from gollm.openai.prompts.model_card import MODEL_CARD_TEMPLATE, INSTRUCTIONS
from gollm.openai.prompts.condense import CONDENSE_PROMPT, format_chunks
from gollm.openai.prompts.dataset_config import DATASET_PROMPT
from gollm.openai.prompts.model_meta_compare import MODEL_METADATA_COMPARE_PROMPT
from gollm.openai.prompts.general_instruction import GENERAL_INSTRUCTION_PROMPT
from gollm.openai.prompts.amr_enrichment import ENRICH_PROMPT
from gollm.openai.react import OpenAIAgent, AgentExecutor, ReActManager
from gollm.openai.toolsets import DatasetConfig

Expand All @@ -29,7 +31,6 @@ def escape_curly_braces(text: str):

def model_config_chain(research_paper: str, amr: str) -> dict:
print("Reading model config from research paper: {}".format(research_paper[:100]))
research_paper = remove_references(research_paper)
research_paper = normalize_greek_alphabet(research_paper)

# probonto ontology file copied from https://github.com/gyorilab/mira/blob/e468059089681c7cd457acc51821b5bd1074df04/mira/dkg/resources/probonto.json
Expand Down Expand Up @@ -59,6 +60,30 @@ def model_config_chain(research_paper: str, amr: str) -> dict:
config = postprocess_oai_json(output.choices[0].message.content)
return model_config_adapter(config)


def amr_enrichment_chain(amr: str, research_paper:str) -> dict:
amr_param_states = parse_param_initials(amr)
prompt = ENRICH_PROMPT.format(
param_initial_dict=amr_param_states,
paper_text=escape_curly_braces(research_paper)
)
client = OpenAI()
output = client.chat.completions.create(
model="gpt-4o-2024-05-13",
max_tokens=4000,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
seed=123,
temperature=0,
response_format={"type": "json_object"},
messages=[
{"role": "user", "content": prompt},
],
)
return postprocess_oai_json(output.choices[0].message.content)


def model_card_chain(research_paper: str = None, amr: str = None) -> dict:
print("Creating model card...")
assert research_paper or amr, "Either research_paper or amr must be provided."
Expand Down
22 changes: 22 additions & 0 deletions gollm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,28 @@ def remove_references(text: str) -> str:
new_text = re.sub(pattern, "", text)
return new_text.strip()

def c(amr: dict):
try:
ode = amr['semantics']['ode']
except KeyError:
raise KeyError("ODE semantics not found in AMR, please provide a valide AMR with structure semantics.ode")

assert 'parameters' in ode, "No parameters found in ODE semantics, please provide a valid AMR with structure semtnatics.ode.parameters"
assert 'initials' in ode, "No initials found in ODE semantics, please provide a valid AMR with structure semantics.ode.initials"

params = ode['parameters']

assert all(['id' in p.keys() for p in params]), "All parameters must have an 'id' key"

param_ids = [p['id'] for p in params if p is not None and p.get('id')]

initials = ode['initials']

assert all(['target' in i.keys() for i in initials]), "All initials must have an 'id' key"

initial_ids = [i['target'] for i in initials if i is not None and i.get('target')]

return {'initial_names': initial_ids, 'param_names': param_ids}

def parse_json_from_markdown(text):
print("Stripping markdown...")
Expand Down

0 comments on commit 752a368

Please sign in to comment.