Skip to content

Commit

Permalink
Merge pull request #52 from DARPA-ASKEM/4790-bug-app-fails-to-extract…
Browse files Browse the repository at this point in the history
…-configurations-from-timeseries-datasets-1

4790 bug app fails to extract configurations from timeseries datasets 1
  • Loading branch information
dgauldie authored Sep 17, 2024
2 parents b5e9a5e + a82b6c7 commit eb7617d
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
2 changes: 1 addition & 1 deletion gollm/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ class ConfigureModelDocument(BaseModel):

class ConfigureModelDataset(BaseModel):
dataset: List[str]
matrix: str
amr: str # expects AMR in a stringified JSON object
matrix: str = None


class ModelCardModel(BaseModel):
Expand Down
13 changes: 9 additions & 4 deletions gollm/openai/prompts/model_meta_compare.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
MODEL_METADATA_COMPARE_PROMPT = """
You are a helpful agent designed to compare multiple AMR models.
Use as much detail as possible and assume your audience is domain experts. When you mention bias and limitations, provide detailed examples. Do not repeat the model card schema headers. Do not refer to 'gollmCard' in your response, refer to 'gollmCard metadata' as 'metadata'. Format the response in Markdown and include section headers.
Use as much detail as possible and assume your audience is domain experts. Use the following to decide how to compare the AMR models:
- If all the AMR models contain metadata, focus solely on comparing metadata information.
- If some but not all of the AMR models contain metadata, compare both metadata and semantic information together.
- If none of the AMR models contain metadata, only focus on comparing semantic information.
If all the AMR models contain gollmCard metadata, focus solely on comparing gollmCard information.
Avoid making assumptions about the AMR models to maintain an objective perspective.
If some but not all of the AMR models contain gollmCard metadata, compare headers, gollmCard, and semantic information together.
If you mention bias and limitations, provide detailed examples.
Do not repeat the metadata schema headers.
Do not use 'gollmCard' in your response, refer to it as 'metadata'.
If none of the AMR models contain gollmCard metadata, only focus on comparing headers and semantic information. Avoid making assumptions about the AMR models to maintain an objective perspective.
Format the response in Markdown and include section headers.
AMRs:
Expand Down
18 changes: 11 additions & 7 deletions gollm/openai/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def amr_enrichment_chain(amr: str, research_paper: str) -> dict:
return postprocess_oai_json(output.choices[0].message.content)


def model_card_chain(amr: str = None, research_paper: str = None) -> dict:
def model_card_chain(amr: str, research_paper: str = None) -> dict:
print("Creating model card...")
assert amr, "An AMR model must be provided."
if not research_paper:
Expand Down Expand Up @@ -199,7 +199,7 @@ def embedding_chain(text: str) -> List:
return output.data[0].embedding


def model_config_from_dataset(amr: str, dataset: List[str]) -> str:
def model_config_from_dataset(amr: str, dataset: List[str], matrix: str) -> str:
print("Extracting datasets...")
dataset_text = os.linesep.join(dataset)

Expand All @@ -211,11 +211,15 @@ def model_config_from_dataset(amr: str, dataset: List[str]) -> str:

print("Building prompt to extract model configurations from a dataset...")
prompt = (CONFIGURE_FROM_DATASET_PROMPT
+ CONFIGURE_FROM_DATASET_MAPPING_PROMPT
+ CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT
+ CONFIGURE_FROM_DATASET_AMR_PROMPT.format(amr=amr)
+ CONFIGURE_FROM_DATASET_DATASET_PROMPT.format(data=dataset_text)
+ "Answer:")
+ CONFIGURE_FROM_DATASET_MAPPING_PROMPT
+ CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT
+ CONFIGURE_FROM_DATASET_AMR_PROMPT.format(amr=amr)
+ CONFIGURE_FROM_DATASET_DATASET_PROMPT.format(data=dataset_text))

if matrix:
prompt += CONFIGURE_FROM_DATASET_MATRIX_PROMPT.format(matrix=matrix)

prompt += "Answer:"

print("Sending request to OpenAI API...")
client = OpenAI()
Expand Down

0 comments on commit eb7617d

Please sign in to comment.