+
+
+
+
+
+
-
@@ -140,13 +145,10 @@
:asset="selectedModel"
:is-visible="showSaveModelModal"
@close-modal="onCloseModelModal"
- @on-save="onAddModel"
/>
diff --git a/packages/client/hmi-client/src/components/workflow/ops/model/tera-model-node.vue b/packages/client/hmi-client/src/components/workflow/ops/model/tera-model-node.vue
index b6a549c734..06219dc9c6 100644
--- a/packages/client/hmi-client/src/components/workflow/ops/model/tera-model-node.vue
+++ b/packages/client/hmi-client/src/components/workflow/ops/model/tera-model-node.vue
@@ -12,6 +12,7 @@
@@ -33,7 +34,7 @@
diff --git a/packages/client/hmi-client/src/services/charts.ts b/packages/client/hmi-client/src/services/charts.ts
index a4661f427a..31c3a21a2e 100644
--- a/packages/client/hmi-client/src/services/charts.ts
+++ b/packages/client/hmi-client/src/services/charts.ts
@@ -2,7 +2,7 @@ import { percentile } from '@/utils/math';
import { isEmpty, pick } from 'lodash';
import { VisualizationSpec } from 'vega-embed';
import { v4 as uuidv4 } from 'uuid';
-import { ChartAnnotation, Intervention } from '@/types/Types';
+import { ChartAnnotation } from '@/types/Types';
import { flattenInterventionData } from './intervention-policy';
const VEGALITE_SCHEMA = 'https://vega.github.io/schema/vega-lite/v5.json';
@@ -730,8 +730,10 @@ export function createSuccessCriteriaChart(
};
}
-export function createInterventionChartMarkers(interventions: Intervention[], hideLabels = false): any[] {
- const data = flattenInterventionData(interventions);
+export function createInterventionChartMarkers(
+ data: ReturnType,
+ hideLabels = false
+): any[] {
const markerSpec = {
data: { values: data },
mark: { type: 'rule', strokeDash: [4, 4], color: 'black' },
@@ -763,8 +765,10 @@ interface InterventionChartOptions extends Omit {
hideLabels?: boolean;
}
-export function createInterventionChart(interventions: Intervention[], chartOptions: InterventionChartOptions) {
- const interventionsData = flattenInterventionData(interventions);
+export function createInterventionChart(
+ interventions: ReturnType,
+ chartOptions: InterventionChartOptions
+) {
const titleObj = chartOptions.title
? {
text: chartOptions.title,
@@ -783,14 +787,14 @@ export function createInterventionChart(interventions: Intervention[], chartOpti
},
layer: []
};
- if (!isEmpty(interventionsData)) {
+ if (!isEmpty(interventions)) {
// markers
createInterventionChartMarkers(interventions, chartOptions.hideLabels).forEach((marker) => {
spec.layer.push(marker);
});
// chart
spec.layer.push({
- data: { values: interventionsData },
+ data: { values: interventions },
mark: 'point',
encoding: {
x: { field: 'time', type: 'quantitative', title: chartOptions.xAxisTitle },
diff --git a/packages/client/hmi-client/src/services/workflow.ts b/packages/client/hmi-client/src/services/workflow.ts
index 54cbd3feca..3387fd1428 100644
--- a/packages/client/hmi-client/src/services/workflow.ts
+++ b/packages/client/hmi-client/src/services/workflow.ts
@@ -807,10 +807,16 @@ export function getActiveOutput(node: WorkflowNode) {
return node.outputs.find((o) => o.id === node.active);
}
-export function updateOutputPort(node: WorkflowNode, updatedOutputPort: WorkflowOutput) {
- let outputPort = node.outputs.find((port) => port.id === updatedOutputPort.id);
- if (!outputPort) return;
- outputPort = Object.assign(outputPort, updatedOutputPort);
+/**
+ * Update the output of a node referenced by the output id
+ * @param node
+ * @param updatedOutput
+ */
+export function updateOutput(node: WorkflowNode, updatedOutput: WorkflowOutput) {
+ const foundOutput = node.outputs.find((output) => output.id === updatedOutput.id);
+ if (foundOutput) {
+ Object.assign(foundOutput, updatedOutput);
+ }
}
// Check if the current-state matches that of the output-state.
diff --git a/packages/client/hmi-client/src/types/Types.ts b/packages/client/hmi-client/src/types/Types.ts
index 620e9cb3b7..fea998711c 100644
--- a/packages/client/hmi-client/src/types/Types.ts
+++ b/packages/client/hmi-client/src/types/Types.ts
@@ -196,7 +196,11 @@ export interface DocumentAsset extends TerariumAsset {
text?: string;
grounding?: Grounding;
documentAbstract?: string;
+ /**
+ * @deprecated
+ */
assets?: DocumentExtraction[];
+ extractions?: ExtractedDocumentPage[];
}
export interface ExternalPublication extends TerariumAsset {
@@ -741,6 +745,13 @@ export interface DocumentExtraction {
metadata: { [index: string]: any };
}
+export interface ExtractedDocumentPage {
+ pageNumber: number;
+ text: string;
+ tables: any[];
+ equations: any[];
+}
+
export interface ModelHeader {
name: string;
description: string;
diff --git a/packages/gollm/.gitignore b/packages/gollm/.gitignore
index 7c2e11f087..acb8170ba8 100644
--- a/packages/gollm/.gitignore
+++ b/packages/gollm/.gitignore
@@ -4,7 +4,7 @@ build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
-gollm_task.egg-info
+gollm.egg-info
### STS ###
.apt_generated
diff --git a/packages/gollm/Dockerfile b/packages/gollm/Dockerfile
index 34f7eac25a..0ec0d112a6 100644
--- a/packages/gollm/Dockerfile
+++ b/packages/gollm/Dockerfile
@@ -13,15 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
rm -rf /var/lib/apt/lists/*
# Install gollm
-COPY ./packages/gollm/gollm-version.txt /gollmVersion.txt
-RUN COMMIT_SHA="$(cat /gollmVersion.txt)" && \
- echo "Using GoLLM commit $COMMIT_SHA" && \
- wget --progress=dot:giga -O gollm.tar.gz "https://github.com/DARPA-ASKEM/GoLLM/archive/${COMMIT_SHA}.tar.gz" && \
- tar -zxvf gollm.tar.gz && \
- rm gollm.tar.gz && \
- mv GoLLM-* GoLLM
-
-WORKDIR /GoLLM
+COPY ./packages/gollm ./gollm
+WORKDIR /gollm
RUN pip install --no-cache-dir .
#^^^^^^^^^^^^^^^^^^^^
@@ -49,11 +42,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Copy the Spring Boot fat JAR from the builder image
COPY --from=gollm_taskrunner_builder /taskrunner/build/libs/*.jar /taskrunner.jar
-# Install GoLLM
-COPY --from=gollm_taskrunner_builder /GoLLM /GoLLM
-WORKDIR /GoLLM
-RUN pip install --no-cache-dir .
-
# Install taskrunner
COPY ./packages/taskrunner/setup.py /taskrunner/setup.py
COPY ./packages/taskrunner/taskrunner.py /taskrunner/taskrunner.py
@@ -61,9 +49,8 @@ WORKDIR /taskrunner
RUN pip install --no-cache-dir -e .
# Install GoLLM tasks
-COPY ./packages/gollm /gollm_task
-
-WORKDIR /gollm_task
+COPY ./packages/gollm /gollm
+WORKDIR /gollm
RUN pip install --no-cache-dir -e .
WORKDIR /
diff --git a/packages/gollm/__init__.py b/packages/gollm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/packages/gollm/dev.sh b/packages/gollm/dev.sh
index c5e51fdf8d..e25c167d1c 100755
--- a/packages/gollm/dev.sh
+++ b/packages/gollm/dev.sh
@@ -2,7 +2,7 @@
# ensure the volume mounted python code is using editable mode
echo "Installing python tasks"
-cd /gollm_task
+cd /gollm
pip install --no-cache-dir -e .
# run it
diff --git a/packages/gollm/entities.py b/packages/gollm/entities.py
new file mode 100644
index 0000000000..9333fb7e3b
--- /dev/null
+++ b/packages/gollm/entities.py
@@ -0,0 +1,178 @@
+import inspect
+from datetime import datetime
+from pydantic import BaseModel, root_validator
+from typing import List, Callable, Type
+
+
+class ConfigureModelDocument(BaseModel):
+ research_paper: str
+ amr: str # expects AMR in a stringified JSON object
+
+
+class InterventionsFromDocument(BaseModel):
+ research_paper: str
+ amr: str # expects AMR in a stringified JSON object
+
+
+class ConfigureModelDataset(BaseModel):
+ dataset: List[str]
+ amr: str # expects AMR in a stringified JSON object
+ matrix: str = None
+
+
+class ModelCardModel(BaseModel):
+ amr: str # expects AMR in a stringified JSON object
+ research_paper: str = None
+
+
+class ModelCompareModel(BaseModel):
+ amrs: List[str] # expects AMRs to be a stringified JSON object
+
+
+class EquationsFromImage(BaseModel):
+ image: str # expects a base64 encoded image
+
+
+class EmbeddingModel(BaseModel):
+ text: str
+ embedding_model: str
+
+ @root_validator(pre=False, skip_on_failure=True)
+ def check_embedding_model(cls, values):
+ embedding_model = values.get("embedding_model")
+ if embedding_model != "text-embedding-ada-002":
+ raise ValueError(
+ 'Invalid embedding model, must be "text-embedding-ada-002"'
+ )
+ return values
+
+
+class Message(BaseModel):
+ message_type: str
+ message_content: str
+ message_id: int = None
+ timestamp: datetime = None
+
+ @root_validator(pre=True)
+ def set_timestamp_id(cls, values):
+ timestamp = values.get("timestamp")
+ if timestamp:
+ values["timestamp"] = datetime.fromtimestamp(timestamp)
+ else:
+ values["timestamp"] = datetime.now()
+ return values
+
+ @root_validator(pre=True)
+ def set_message_id(cls, values):
+ timestamp = values.get("timestamp")
+ if timestamp:
+ values["message_id"] = int(timestamp.timestamp())
+ return values
+
+
+class Action(BaseModel):
+ message_id: int
+ action_blob: dict
+
+
+class ChatSession:
+ # create session_id from datetime now
+ session_id = int(datetime.now().timestamp())
+
+ def __init__(self, system_context: str):
+ self.system_context = system_context
+ self.conversation_history = []
+
+ def add_message(self, message: Message):
+ """
+ Add a message to the conversation history.
+ """
+ self.conversation_history.append(message)
+
+ def get_history(self):
+ """
+ Get the conversation history.
+ :return: List of tuples containing message type and content
+ """
+ return self.conversation_history
+
+
+class Tool:
+ def __init__(
+ self, name: str, args: List, description: str, func: Callable, input_type: Type
+ ):
+ self.name = name
+ self.args = args
+ self.description = description
+ self.func = func
+ self.input_type = input_type
+
+ def __call__(self, *args, **kwargs):
+ return self.func(*args, **kwargs)
+
+
+class Toolset:
+ """
+ A class for testing the toolset. Use as context manager to add tools.
+ Example usage:
+
+ with Toolset() as named_toolset:
+ named_toolset.add_tool("ask_a_human", ["human_instructions"], "Asks the end user for their input. Useful if there are no existing tools to solve your task. You can rely on the user to search the web, provide personal details, and generally provide you with up-to-date information.", _ask_a_human, str)
+ named_toolset.add_tool("get_date", ["date_format"], "Returns the current date.", _get_date, str)
+ named_toolset.add_tool("read_csv", ["file_path", "**kwargs"], "Reads a CSV file into a pandas DataFrame.", _read_csv, str)
+ """
+
+ def __init__(self):
+ self.tools = []
+
+ @property
+ def TOOLS(self):
+ return {tool.name: tool for tool in self.tools}
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
+
+ def add_tool(
+ self,
+ name: str,
+ args: List[str],
+ description: str,
+ func: Callable,
+ input_type: Type,
+ ):
+ self.tools.append(Tool(name, args, description, func, input_type))
+
+ def get_tool_names(self):
+ """
+ Returns a string of tool names.
+ """
+ return "\n".join([tool.name for tool in self.tools])
+
+ def get_tool_code(self):
+ """
+ Returns a string of tool arguments. Used for zero shot ReAct
+ """
+ return "\n".join([inspect.getsource(tool.func) for tool in self.tools])
+
+ def run_tool(self, tool_name: str, tool_args):
+ """
+ Runs a tool.
+ """
+ tool = self.TOOLS[tool_name]
+ return tool(tool.input_type(tool_args))
+
+ def add_tool(
+ self,
+ name: str,
+ args: List[str],
+ description: str,
+ func: Callable,
+ input_type: Type,
+ ):
+ """
+ Adds a tool to the toolset.
+ """
+ self.tools.append(Tool(name, args, description, func, input_type))
diff --git a/packages/gollm/gollm-version.txt b/packages/gollm/gollm-version.txt
deleted file mode 100644
index ee38882cdb..0000000000
--- a/packages/gollm/gollm-version.txt
+++ /dev/null
@@ -1 +0,0 @@
-e8d89a0163a6ebfb15df66c55f42da56ad81ffac
diff --git a/packages/gollm/gollm_openai/__init__.py b/packages/gollm/gollm_openai/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/packages/gollm/gollm_openai/prompts/__init__.py b/packages/gollm/gollm_openai/prompts/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/packages/gollm/gollm_openai/prompts/amr_enrichment.py b/packages/gollm/gollm_openai/prompts/amr_enrichment.py
new file mode 100644
index 0000000000..9960097ceb
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/amr_enrichment.py
@@ -0,0 +1,25 @@
+ENRICH_PROMPT = """
+ You are a helpful agent designed to extract metadata associated with petrinet models. \
+ You will focus on extracting descriptions and units for each initial place and transition in the model.
+
+ For context:
+
+ In a Petri net model, initials represent the initial state of the system through the initial distribution of tokens across the places, known as the initial marking. Each place corresponds to a variable or state in the system, such as a species concentration in a reaction, and the number of tokens reflects the initial conditions of the ODEs.
+ Parameters define the system's rules and rates of evolution, including transition rates (analogous to reaction rates in ODEs) that determine how quickly tokens move between places. These parameters also include stoichiometric relationships, represented by the weights of arcs connecting places and transitions, dictating how many tokens are consumed or produced when a transition occurs.
+
+ Your initials and parameters to extract are: {param_initial_dict}
+
+ Extract descriptions and units from the following research paper: {paper_text}\n###PAPER END###\n
+
+ Please provide your output in the following json format:
+
+ {{'initials': {{'place1': {{'description': '...', 'units': '...'}}, 'place2': {{'description': '...', 'units': '...'}}, ...}}, 'parameters': {{'transition1': {{'description': '...', 'units': '...'}}, 'transition2': {{'description': '...', 'units': '...'}}, ...}}}}
+
+ Ensure that units are provided in both a unicode string and mathml format like so:
+
+ "units": {{ "expression": "1/(person*day)", "expression_mathml": "1personday" }}
+
+ Where 'placeN' and 'transitionN' are the names of the intials and parameters to extract as found in the provided dictionary.
+
+ Begin:
+ """
diff --git a/packages/gollm/gollm_openai/prompts/condense.py b/packages/gollm/gollm_openai/prompts/condense.py
new file mode 100644
index 0000000000..9f98c1e0ed
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/condense.py
@@ -0,0 +1,10 @@
+from typing import List
+
+
+def format_chunks(chunks: List[str]) -> str:
+ """Format a list of chunks into a single string."""
+ return "\n---END CHUNK---".join(chunks)
+
+
+CONDENSE_PROMPT = """You are a helpful agent deployed within a search engine. It is your job to extract relevant information from chunks to answer the user's query. Do your best to attribute each part of your answer to the provided chunks.
+ Given the following user query: \n\n {query} \n\n strictly use the following chunks to answer the user's query: \n\n {chunks} \n\n Answer: \n\n"""
diff --git a/packages/gollm/gollm_openai/prompts/config_from_dataset.py b/packages/gollm/gollm_openai/prompts/config_from_dataset.py
new file mode 100644
index 0000000000..8cfb81dbc2
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/config_from_dataset.py
@@ -0,0 +1,120 @@
+CONFIGURE_FROM_DATASET_PROMPT = """
+You are a helpful agent designed to create a model configuration for a given AMR model from a user-supplied CSV dataset.
+
+Create a condition for each dataset.
+
+The user-supplied dataset may be either a time-series dataset or a model-mapping dataset.
+One of your key tasks is to determine the type of dataset supplied. This can be done by examining the column headers in the first row and the values in the first column of the user-supplied CSV dataset.
+ - Model-mapping datasets have the first row and column containing labels and the rest containing numerical values. Often, the first cell in the CSV is empty.
+ - Time-series datasets usually have the first row as labels and a column representing sequential time steps. You can use the column headers to determine which column represents time steps. If the dataset does not have header information, look for columns with date strings or incrementally increasing timestamps or numbers. The other columns will represent the values of the AMR model's states.
+
+If the dataset is time-series, follow the instructions in the TIME-SERIES EXTRACTION section.
+If the dataset is model-mapping, follow the instructions in the MODEL-MAPPING EXTRACTION section.
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+"""
+
+CONFIGURE_FROM_DATASET_MAPPING_PROMPT = """
+---MODEL-MAPPING EXTRACTION START---
+
+Pay attention to the following example and use it to understand how to extract values from a model-mapping dataset:
+
+---EXAMPLE START---
+
+---MATRIX START---
+
+subject-controllers of f
+
+, I_1, I_2, I_3
+S_1, f_0, f_1, f_2
+S_2, f_4, f_3, f_5
+S_3, f_7, f_8, f_6
+
+---MATRIX END---
+
+---SAMPLE DATASET START---
+
+, I_1, I_2, I_3
+S_1, 38.6, 20.5, 6.1
+S_2, 20.5, 28.2, 11.5
+S_3, 6.1, 11.5, 20.0
+
+---SAMPLE DATASET END---
+
+Since the subject of f_0 is S_1 and the controller of f_0 is I_1. We want to map the value from the dataset cell S_1, I_1 to f_0 which will be 38.6.
+
+Based on this information, we do not know the initial values for I_1 and S_1. Do not misinterpret these interaction values as initials.
+
+---EXAMPLE END---
+
+If the user-supplied dataset is a model-mapping dataset, you must create a model configuration JSON object that satisfies the JSON schema specified in the response format. To do this, follow the instructions below:
+ 1. Using metadata from the AMR model and the user-supplied dataset, create values for `name` and `description`.
+ 2. Provide a long-form description for the description. Set it to an empty string if it cannot be created from the provided metadata.
+ 3. `model_id` id a UUID. If the AMR model has an id, you can use it. Otherwise, you can set as the nil UUID "00000000-0000-0000-0000-000000000000".
+ 4. Create a parameter semantic object for each parameter specified in the AMR model ODE semantics. Do not create new parameter semantic objects if they are not included in the original AMR model. You should set parameter semantic object fields using the following rules:
+ a. `reference_id` should reference the id of the parameter.
+ b. `source` should reference the title or file name of the user-supplied dataset.
+ c. `type` should be set to "parameter".
+ d. Be sure to extract parameter values from the user-supplied dataset, and do not use the default values from the AMR model. Set the parameter `value` to the constant value and set `type` to "Constant".
+ 5. Create an initial semantic object for each initial specified in the AMR model ODE semantics. Use the default values found in the AMR model. Do not try to create new values.
+ 6. `observableSemanticList` should be an empty list.
+ 7. `inferredParameterList` should be an empty list.
+
+---MODEL-MAPPING EXTRACTION END---
+
+"""
+
+CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT = """
+---TIME-SERIES EXTRACTION START---
+
+If the user-supplied dataset is a time-series dataset, you must create a model configuration JSON object that satisfies the JSON schema specified in the response format. To do this, follow the instructions below:
+ 1. Using metadata from the AMR model and the user-supplied dataset, create values for `name` and `description`.
+ 2. Provide a long-form description for the description. If it cannot be created from the provided metadata, set it to an empty string.
+ 3. `model_id` id a UUID. If the AMR model has an id, you can use it. Otherwise, you can set as the nil UUID "00000000-0000-0000-0000-000000000000".
+ 4. Create an initial semantic object for each initial specified in the AMR model ODE semantics. Do not create new initial semantic objects if they are not included in the original AMR model. You should set initial semantic object fields using the following rules:
+ a. `target` should reference the id of the initial variable from the AMR model ODE semantics.
+ b. `source` should reference the title or file name of the user-supplied dataset.
+ c. `type` should be set to "initial”.
+ d. Find the value for `expression` in the user-supplied dataset that aligns with timepoint 0 or the earliest available timepoint.
+ e. `expression_mathml` should be the value of `expression` written in MathML format.
+ 5. Create a parameter semantic object for each parameter specified in the AMR model ODE semantics. Use the default values found in the AMR model. Do not try to create new values. If the default value is a constant type, set the parameter `value` to the constant value and set `type` to "Constant". If the default value is a distribution with a maximum and minimum value, set `type` to only "Uniform" and populate the `minimum` and `maximum` fields.
+ 6. `observableSemanticList` should be an empty list.
+ 7. `inferredParameterList` should be an empty list.
+
+---TIME-SERIES EXTRACTION END---
+
+"""
+
+CONFIGURE_FROM_DATASET_DATASET_PROMPT = """
+Use the following user-supplied dataset to answer the query:
+
+---START USER-SUPPLIED CSV DATASET---
+
+{data}
+
+---END USER-SUPPLIED CSV DATASET---
+
+"""
+
+CONFIGURE_FROM_DATASET_AMR_PROMPT = """
+Use the following JSON representation of an AMR model as a reference:
+
+---START AMR MODEL JSON---
+
+{amr}
+
+---END AMR MODEL JSON---
+
+"""
+
+CONFIGURE_FROM_DATASET_MATRIX_PROMPT = """
+Use the following contact matrix as a reference for model-mapping datasets:
+
+---START MATRIX---
+
+{matrix}
+
+---END MATRIX---
+
+"""
diff --git a/packages/gollm/gollm_openai/prompts/config_from_document.py b/packages/gollm/gollm_openai/prompts/config_from_document.py
new file mode 100644
index 0000000000..10e4450dea
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/config_from_document.py
@@ -0,0 +1,44 @@
+CONFIGURE_FROM_DOCUMENT_PROMPT = """
+You are a helpful agent designed to find multiple model configurations for a given AMR model of various conditions described in a research paper and the initials and parameters that make up those conditions.
+For context, initials represent the initial state of the system through the initial distribution of tokens across the places, known as the initial marking. Each place corresponds to a variable or state in the system, such as a species concentration in a reaction, and the number of tokens reflects the initial conditions of the ODEs. Parameters define the system's rules and rates of evolution, including transition rates (analogous to reaction rates in ODEs) that determine how quickly tokens move between places. These parameters also include stoichiometric relationships, represented by the weights of arcs connecting places and transitions, dictating how many tokens are consumed or produced when a transition occurs.
+
+Use the following AMR model JSON file as a reference:
+
+---START AMR MODEL JSON---
+{amr}
+---END AMR MODEL JSON---
+
+Use the following user-provided text as the research paper to answer the query:
+
+---START USER-PROVIDED TEXT---
+{research_paper}
+---END USER-PROVIDED TEXT---
+
+Assume that the user-provided text describes multiple conditions to which the model can be applied. Create a model configuration for each condition.
+Be sure to extract parameter values and initial values from the user-provided text, and do not use the default values from the AMR model.
+Be sure to use consistent naming conventions for the conditions. Instead of 'condition_1' and 'condition_2', use descriptive names.
+
+For each condition, create a model configuration JSON object that satisfies the JSON schema specified in the response format. To do this, follow the instructions below:
+1. Create a value for `name` and `description` from the user-provided text.
+2. For the description, provide a long-form description of the condition. If the description cannot be created from the user-provided text, set it to an empty string.
+3. `model_id` id a UUID. If the AMR model has an id, you can use it. Otherwise, you can set as the nil UUID "00000000-0000-0000-0000-000000000000".
+4. For each initial specified in the AMR model ODE semantics, create an initial semantic object. Do not create new initial semantic objects if they are not included in the original AMR model. You should set initial semantic object fields using the following rules:
+ a. `target` should reference the id of the initial variable from the AMR model ODE semantics.
+ b. `source` should reference the title or file name of the research paper.
+ c. `type` should be set to "initial”.
+ d. You should extract a numerical value or an expression of the initial state from the user-provided text if possible and add it to `expression`
+ e. `expression_mathml` should be `expression` written in MathML format.
+5. For each parameter specified in the AMR model ODE semantics, create a parameter semantic object. Do not create new parameter semantic objects if they are not included in the original AMR model. You should set parameter semantic object fields using the following rules:
+ a. `reference_id` should reference the id of the parameter.
+ b. `source` should reference the title or file name of the research paper.
+ c. `type` should be set to "parameter".
+ d. Be sure to extract parameter values from the user-provided text, and do not use the default values from the AMR model.
+ - If the extracted parameter value is a single constant value, set the parameter `value` to the constant value and set `type` to "Constant".
+ - If the extracted parameter value is a distribution with a maximum and minimum value, set `type` to only "Uniform" and populate the `minimum` and `maximum` fields.
+6. `observableSemanticList` should be an empty list.
+7. `inferredParameterList` should be an empty list.
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
+"""
diff --git a/packages/gollm/gollm_openai/prompts/equations_from_image.py b/packages/gollm/gollm_openai/prompts/equations_from_image.py
new file mode 100644
index 0000000000..b240341b02
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/equations_from_image.py
@@ -0,0 +1,8 @@
+EQUATIONS_FROM_IMAGE_PROMPT = """
+This image will contain one or more equations. For each equation, create a LaTeX representation of the equation.
+Multiple equations will probably be vertically separated, they may be enumerated by numbers or letters. Do not include these enumerations in the LaTeX equation.
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
+"""
diff --git a/packages/gollm/gollm_openai/prompts/general_instruction.py b/packages/gollm/gollm_openai/prompts/general_instruction.py
new file mode 100644
index 0000000000..52271d558e
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/general_instruction.py
@@ -0,0 +1,8 @@
+GENERAL_INSTRUCTION_PROMPT = """
+You are a helpful agent designed to generate a response based on a given instruction. Your goal is to provide a response that is detailed, accurate, and fully addresses the user's request.
+Given the following user instruction:
+
+{instruction}
+
+Please ensure your response is relevant, comprehensive, clear, and supported with specific examples, if applicable.
+"""
diff --git a/packages/gollm/gollm_openai/prompts/interventions_from_document.py b/packages/gollm/gollm_openai/prompts/interventions_from_document.py
new file mode 100644
index 0000000000..d594c2db99
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/interventions_from_document.py
@@ -0,0 +1,36 @@
+INTERVENTIONS_FROM_DOCUMENT_PROMPT = """
+You are a helpful agent designed to find intervention policies for a given AMR model described in a research paper.
+For context, intervention policies can include multiple interventions that include only static interventions or dynamic interventions.
+Static interventions are applied at a specific point in time and permanently change the value of a specific parameter or state.
+Dynamic interventions try to keep a specific parameter from going above or below a threshold value.
+
+Use the following AMR model JSON file as a reference:
+
+---START AMR MODEL JSON---
+{amr}
+---END AMR MODEL JSON---
+
+Use the following user-provided text as the research paper to answer the query:
+
+---START USER-PROVIDED TEXT---
+{research_paper}
+---END USER-PROVIDED TEXT---
+
+Assume that the user-provided text describes multiple intervention policies to apply to the model.
+For each intervention policy, create a list of interventions depending on what the text describes.
+Be sure to use a meaningful descriptive name for the intervention policy, instead of 'intervention_1' and 'intervention_2'.
+
+For each policy described in the paper, create an interventionPolicy object. To do this, follow the instructions below:
+1. Create a value for `name` and `description` from the user-provided text.
+2. For the description, provide a long-form description of the condition. If the description cannot be created from the user-provided text, set it to an empty string.
+3. `model_id` id a UUID. If the AMR model has an id, you can use it. Otherwise, you can set as the nil UUID "00000000-0000-0000-0000-000000000000".
+4. For each intervention specified in the policy create an intervention object with the following rules.
+ a. Create a value for `name` from the user-provided text.
+ b. `appliedTo` should reference the id of the parameter or initial state of the AMR Model. If you cannot find an initial state or parameter that matches the intervention, do not create an intervention object.
+ c. `type` should be either "state" or "parameter" depending on what the intervention is applied to.
+ d. create a list of either static or dynamic interventions, but not both.
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
+"""
diff --git a/packages/gollm/gollm_openai/prompts/model_card.py b/packages/gollm/gollm_openai/prompts/model_card.py
new file mode 100644
index 0000000000..eea9ed598d
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/model_card.py
@@ -0,0 +1,24 @@
+INSTRUCTIONS = """
+You are a helpful agent designed to populate metadata of a given AMR model.
+
+You may have access to a document that describes the given AMR model and a JSON representation of the AMR model we want populated. Structural information should come from the AMR model.
+
+You may only have access to the model. Do your best to populate the JSON object specified in the response format with as much information as possible.
+If you cannot answer the entire query, provide as much information as possible. If there is no answer, populate fields with a null values. Do not leave any fields empty and do not make up information.
+
+Use the following document as a reference:
+
+---DOCUMENT START---
+{research_paper}
+---DOCUMENT END--
+
+Use the following JSON representation of an AMR model as a reference:
+
+---MODEL START--
+{amr}
+---MODEL END---
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
+"""
diff --git a/packages/gollm/gollm_openai/prompts/model_meta_compare.py b/packages/gollm/gollm_openai/prompts/model_meta_compare.py
new file mode 100644
index 0000000000..8424981833
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/model_meta_compare.py
@@ -0,0 +1,21 @@
+MODEL_METADATA_COMPARE_PROMPT = """
+You are a helpful agent designed to compare multiple AMR models.
+
+Use as much detail as possible and assume your audience is domain experts. Use the following to decide how to compare the AMR models:
+ - If any of the AMR models have gollmCard information, only use those models to fill out metadataComparison fields. No not include models that do not have gollmCard information in these comparison summaries.
+ - If no AMR models contain gollmCard information, leave metadataComparison fields blank.
+ - Fill in the semanticComparison fields for all models.
+
+All fields should be a short comparison summary explaining the differences and similarities between the models.
+Avoid making assumptions about the AMR models to maintain an objective perspective.
+Do not mention GollmCard or gollmCard. Refer to gollmCard as metadata.
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+---MODELS START---
+
+{amrs}
+
+---MODELS END---
+
+Comparison:
+"""
diff --git a/packages/gollm/gollm_openai/prompts/react.py b/packages/gollm/gollm_openai/prompts/react.py
new file mode 100644
index 0000000000..3ce9179464
--- /dev/null
+++ b/packages/gollm/gollm_openai/prompts/react.py
@@ -0,0 +1,49 @@
+SYSTEM_MESSAGE_PREFIX = """Answer the following questions as best you can. Assume that you are a conversational agent helping a user and any time-based knowledge of yours may be out of date, and should be looked up if you are given access to a tool that will enable you to do so. You have access to the following tools:"""
+FORMAT_INSTRUCTIONS = """The way you use the tools is by specifying a json blob.
+Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).
+
+The only values that should be in the "action" field are: {tool_names}
+
+The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. DO NOT PUT the string 'json' in between the brackets and double quotes. Here is an example of a valid $JSON_BLOB:
+
+```
+{{{{
+ "action": $TOOL_NAME,
+ "action_input": $INPUT
+}}}}
+```
+
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do, each thought corresponds to a single action and observation.
+Action:
+
+```
+$JSON_BLOB
+```
+Observation: the result of the action.
+Thought: Next thought
+Action:
+```
+$JSON_BLOB
+```
+Observation: the result of the next action.
+... (this Thought/Action/Observation can repeat multiple times.)
+
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question.
+
+"""
+SYSTEM_MESSAGE_SUFFIX = """Begin! Reminder to always use the exact characters `Final Answer` if you are certain of the final answer."""
+HUMAN_MESSAGE = "{input}\n\n{scratchpad}"
+ACT_OBS = """
+Action:
+```
+{{
+ 'action': {tool_name},
+ 'action_input': {arg}
+}}
+```
+Observation: {tool_name} returned {obs}
+"""
diff --git a/packages/gollm/gollm_openai/schemas/__init__.py b/packages/gollm/gollm_openai/schemas/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/packages/gollm/gollm_openai/schemas/compare_models.json b/packages/gollm/gollm_openai/schemas/compare_models.json
new file mode 100644
index 0000000000..d569791879
--- /dev/null
+++ b/packages/gollm/gollm_openai/schemas/compare_models.json
@@ -0,0 +1,77 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "type": "object",
+ "properties": {
+ "title": {
+ "type": "string",
+ "description": "The title should say 'Comparison of' and the names of the models being compared."
+ },
+ "summary": {
+ "type": "string",
+ "description": "A brief summary of the comparison of models. This should include the main differences and similarities between the models."
+ },
+ "semanticComparison": {
+ "type": "object",
+ "properties": {
+ "states": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the states of the models. For example, how many states are there in each model, and how do they differ?"
+ },
+ "parameters": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the parameters of the models. For example, how many parameters are there in each model, and how do they differ?"
+ },
+ "transitions": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the transitions of the models. For example, how many transitions are there in each model, and how do they differ?"
+ },
+ "observables": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the observables of the models. For example, how many observables are there in each model, and how do they differ?"
+ }
+ },
+ "required": [
+ "states",
+ "parameters",
+ "transitions",
+ "observables"
+ ],
+ "additionalProperties": false
+ },
+ "metadataComparison": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string",
+ "description": "Compare the content of the descriptions of the models. Explain any differences and similarities."
+ },
+ "uses": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the uses of the models. For example, how are the models used differently, and how are they used similarly?"
+ },
+ "biasRisksLimitations": {
+ "type": "string",
+ "description": "Explain the differences and similarities between the bias, risks, and limitations of the models. For example, what are the biases, risks, and limitations of each model, and how do they differ?"
+ },
+ "testing": {
+ "type": "string",
+ "description": "Explain the differences and similarities between how the models were validated. For example, how does the validation process differ?"
+ }
+ },
+ "required": [
+ "description",
+ "uses",
+ "biasRisksLimitations",
+ "testing"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "title",
+ "summary",
+ "semanticComparison",
+ "metadataComparison"
+ ],
+ "additionalProperties": false
+}
diff --git a/packages/gollm/gollm_openai/schemas/configuration.json b/packages/gollm/gollm_openai/schemas/configuration.json
new file mode 100644
index 0000000000..2d85cb5e7d
--- /dev/null
+++ b/packages/gollm/gollm_openai/schemas/configuration.json
@@ -0,0 +1,275 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "$defs": {
+ "modelConfiguration": {
+ "type": "object",
+ "description": "state and parameter configurations for a model",
+ "properties": {
+ "description": {
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ "inferredParameterList": {
+ "type": [
+ "array",
+ "null"
+ ],
+ "items": {
+ "type": "object",
+ "properties": {
+ "distribution": {
+ "type": "object",
+ "properties": {
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "value": {
+ "type": [
+ "number",
+ "null"
+ ]
+ },
+ "minimum": {
+ "type": [
+ "number",
+ "null"
+ ]
+ },
+ "maximum": {
+ "type": [
+ "number",
+ "null"
+ ]
+ }
+ },
+ "required": [
+ "value",
+ "minimum",
+ "maximum"
+ ],
+ "additionalProperties": false
+ },
+ "type": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "parameters",
+ "type"
+ ],
+ "additionalProperties": false
+ },
+ "referenceId": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "initial",
+ "parameter",
+ "observable",
+ "inferred"
+ ]
+ }
+ },
+ "required": [
+ "distribution",
+ "referenceId",
+ "source",
+ "type"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "initialSemanticList": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "expression": {
+ "type": "string"
+ },
+ "expressionMathml": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ },
+ "target": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "initial",
+ "parameter",
+ "observable",
+ "inferred"
+ ]
+ }
+ },
+ "required": [
+ "expression",
+ "expressionMathml",
+ "source",
+ "target",
+ "type"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "modelId": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "observableSemanticList": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "expression": {
+ "type": "string"
+ },
+ "expressionMathml": {
+ "type": "string"
+ },
+ "referenceId": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ },
+ "states": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "initial",
+ "parameter",
+ "observable",
+ "inferred"
+ ]
+ }
+ },
+ "required": [
+ "expression",
+ "expressionMathml",
+ "referenceId",
+ "source",
+ "states",
+ "type"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "parameterSemanticList": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "distribution": {
+ "type": "object",
+ "properties": {
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "value": {
+ "type": [
+ "number",
+ "null"
+ ]
+ },
+ "minimum": {
+ "type": [
+ "number",
+ "null"
+ ]
+ },
+ "maximum": {
+ "type": [
+ "number",
+ "null"
+ ]
+ }
+ },
+ "required": [
+ "value",
+ "minimum",
+ "maximum"
+ ],
+ "additionalProperties": false
+ },
+ "type": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "parameters",
+ "type"
+ ],
+ "additionalProperties": false
+ },
+ "referenceId": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "initial",
+ "parameter",
+ "observable",
+ "inferred"
+ ]
+ }
+ },
+ "required": [
+ "distribution",
+ "referenceId",
+ "source",
+ "type"
+ ],
+ "additionalProperties": false
+ }
+ }
+ },
+ "required": [
+ "description",
+ "inferredParameterList",
+ "initialSemanticList",
+ "modelId",
+ "name",
+ "observableSemanticList",
+ "parameterSemanticList"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "type": "object",
+ "properties": {
+ "conditions": {
+ "type": "array",
+ "items": {
+ "$ref": "#/$defs/modelConfiguration"
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "conditions"
+ ],
+ "additionalProperties": false
+}
diff --git a/packages/gollm/gollm_openai/schemas/equations.json b/packages/gollm/gollm_openai/schemas/equations.json
new file mode 100644
index 0000000000..a0f4dd9537
--- /dev/null
+++ b/packages/gollm/gollm_openai/schemas/equations.json
@@ -0,0 +1,18 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "type": "object",
+ "properties": {
+ "equations": {
+ "type": "array",
+ "description": "A list of equations defined in an image.",
+ "items": {
+ "type": "string",
+ "description": "A LaTeX representation of an equation."
+ }
+ }
+ },
+ "required": [
+ "equations"
+ ],
+ "additionalProperties": false
+}
diff --git a/packages/gollm/gollm_openai/schemas/intervention_policy.json b/packages/gollm/gollm_openai/schemas/intervention_policy.json
new file mode 100644
index 0000000000..a69783c97f
--- /dev/null
+++ b/packages/gollm/gollm_openai/schemas/intervention_policy.json
@@ -0,0 +1,125 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "$defs": {
+ "intervention": {
+ "type": "object",
+ "description": "A list of interventions that can be applied to the model.",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "appliedTo": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "state",
+ "parameter"
+ ]
+ },
+ "staticInterventions": {
+ "type": "array",
+ "description": "A list of static interventions that can be applied to the model.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "timestep": {
+ "type": "number",
+ "description": "The timestep at which the intervention is applied"
+ },
+ "value": {
+ "type": "number",
+ "description": "The new value of the state or parameter"
+ }
+ },
+ "required": [
+ "timestep",
+ "value"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "dynamicInterventions": {
+ "type": "array",
+ "description": "A list of dynamic interventions that can be applied to the model.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "parameter": {
+ "type": "string",
+ "description": "The parameter to which the intervention is applied"
+ },
+ "threshold": {
+ "type": "string",
+ "description": "The threshold value for the which the parameter can't go above or below"
+ },
+ "value": {
+ "type": "string",
+ "description": "The new value of the state or parameter"
+ },
+ "isGreaterThan": {
+ "type": "string",
+ "description": "Whether the parameter should be greater than or less than the threshold"
+ }
+ },
+ "required": [
+ "parameter",
+ "threshold",
+ "value",
+ "isGreaterThan"
+ ],
+ "additionalProperties": false
+ }
+ }
+ },
+ "required": [
+ "name",
+ "appliedTo",
+ "type",
+ "staticInterventions",
+ "dynamicInterventions"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "type": "object",
+ "properties": {
+ "interventionPolicies": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "modelId": {
+ "type": "string",
+ "description": "The ID of the AMR model to which the intervention policy applies."
+ },
+ "name": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "interventions": {
+ "type": "array",
+ "items": {
+ "$ref": "#/$defs/intervention"
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "modelId",
+ "name",
+ "description",
+ "interventions"
+ ],
+ "additionalProperties": false
+ }
+ }
+ },
+ "required": [
+ "interventionPolicies"
+ ],
+ "additionalProperties": false
+}
diff --git a/packages/gollm/gollm_openai/schemas/model_card.json b/packages/gollm/gollm_openai/schemas/model_card.json
new file mode 100644
index 0000000000..1771bd1455
--- /dev/null
+++ b/packages/gollm/gollm_openai/schemas/model_card.json
@@ -0,0 +1,218 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "type": "object",
+ "properties": {
+ "summary": {
+ "type": "object",
+ "properties": {
+ "modelSummary": {
+ "type": "string",
+ "description": "A brief description of the system or process."
+ }
+ },
+ "required": [
+ "modelSummary"
+ ],
+ "additionalProperties": false
+ },
+ "details": {
+ "type": "object",
+ "properties": {
+ "modelDescription": {
+ "type": "string",
+ "description": "Describe the structure of the model in the paper, including its places, transitions, and arcs. Mention if it can likely be represented in petrinet format."
+ },
+ "fundedBy": {
+ "type": "string",
+ "description": "If applicable, list the funding sources."
+ },
+ "modelType": {
+ "type": "string",
+ "description": "Mathematical / Graphical Model / Other"
+ }
+ },
+ "required": [
+ "modelDescription",
+ "fundedBy",
+ "modelType"
+ ],
+ "additionalProperties": false
+ },
+ "uses": {
+ "type": "object",
+ "properties": {
+ "directUse": {
+ "type": "string",
+ "description": "Explain how the model can be used to analyze or simulate specific systems or processes."
+ },
+ "outOfScopeUse": {
+ "type": "string",
+ "description": "Describe scenarios where using the model would be inappropriate or misleading."
+ }
+ },
+ "required": [
+ "directUse",
+ "outOfScopeUse"
+ ],
+ "additionalProperties": false
+ },
+ "biasRisksLimitations": {
+ "type": "object",
+ "properties": {
+ "modelBiasRisksLimitations": {
+ "type": "string",
+ "description": "Describe sources of bias and risk based on the research paper"
+ }
+ },
+ "required": [
+ "modelBiasRisksLimitations"
+ ],
+ "additionalProperties": false
+ },
+ "testing": {
+ "type": "object",
+ "properties": {
+ "testingDataFactorsMetrics": {
+ "type": "string",
+ "description": "Describe how the model was validated, e.g., through simulation, comparison with real-world data, etc."
+ }
+ },
+ "required": [
+ "testingDataFactorsMetrics"
+ ],
+ "additionalProperties": false
+ },
+ "specs": {
+ "type": "object",
+ "properties": {
+ "modelSpecs": {
+ "type": "string",
+ "description": "Details about the model's complexity, such as the number of places, transitions, parameter count, and arcs."
+ }
+ },
+ "required": [
+ "modelSpecs"
+ ],
+ "additionalProperties": false
+ },
+ "glossary": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "authors": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "gettingStarted": {
+ "type": "object",
+ "properties": {
+ "examples": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "examples"
+ ],
+ "additionalProperties": false
+ },
+ "citations": {
+ "type": "object",
+ "properties": {
+ "references": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "references"
+ ],
+ "additionalProperties": false
+ },
+ "moreInformation": {
+ "type": "object",
+ "properties": {
+ "links": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "links"
+ ],
+ "additionalProperties": false
+ },
+ "structuralInformation": {
+ "type": "object",
+ "properties": {
+ "schemaName": {
+ "type": "string"
+ },
+ "parameterNames": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "domain": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "modelType": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "modelStructure": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "modelParameters": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "schemaName",
+ "parameterNames",
+ "domain",
+ "modelType",
+ "modelStructure",
+ "modelParameters"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "summary",
+ "details",
+ "uses",
+ "biasRisksLimitations",
+ "testing",
+ "specs",
+ "glossary",
+ "authors",
+ "gettingStarted",
+ "citations",
+ "moreInformation",
+ "structuralInformation"
+ ],
+ "additionalProperties": false
+}
diff --git a/packages/gollm/gollm_openai/tool_utils.py b/packages/gollm/gollm_openai/tool_utils.py
new file mode 100644
index 0000000000..0aefb52f04
--- /dev/null
+++ b/packages/gollm/gollm_openai/tool_utils.py
@@ -0,0 +1,410 @@
+import base64
+import imghdr
+import json
+import os
+from gollm_openai.prompts.amr_enrichment import ENRICH_PROMPT
+from gollm_openai.prompts.condense import CONDENSE_PROMPT, format_chunks
+from gollm_openai.prompts.config_from_dataset import (
+ CONFIGURE_FROM_DATASET_PROMPT,
+ CONFIGURE_FROM_DATASET_DATASET_PROMPT,
+ CONFIGURE_FROM_DATASET_MAPPING_PROMPT,
+ CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT,
+ CONFIGURE_FROM_DATASET_AMR_PROMPT,
+ CONFIGURE_FROM_DATASET_MATRIX_PROMPT
+)
+from gollm_openai.prompts.config_from_document import CONFIGURE_FROM_DOCUMENT_PROMPT
+from gollm_openai.prompts.equations_from_image import EQUATIONS_FROM_IMAGE_PROMPT
+from gollm_openai.prompts.general_instruction import GENERAL_INSTRUCTION_PROMPT
+from gollm_openai.prompts.interventions_from_document import INTERVENTIONS_FROM_DOCUMENT_PROMPT
+from gollm_openai.prompts.model_card import INSTRUCTIONS
+from gollm_openai.prompts.model_meta_compare import MODEL_METADATA_COMPARE_PROMPT
+from openai import OpenAI
+from openai.types.chat.completion_create_params import ResponseFormat
+from typing import List
+from utils import (
+ exceeds_tokens,
+ model_config_adapter,
+ normalize_greek_alphabet,
+ parse_param_initials,
+ postprocess_oai_json,
+ validate_schema
+)
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def escape_curly_braces(text: str):
+ # Escape curly braces
+ return text.replace("{", "{{").replace("}", "}}")
+
+
+def get_image_format_string(image_format: str) -> str:
+ if not image_format:
+ raise ValueError("Invalid image format.")
+
+ format_strings = {
+ "rgb": f"data:image/rgb:base64,",
+ "gif": f"data:image/gif:base64,",
+ "pbm": f"data:image/pbm:base64,",
+ "pgm": f"data:image/pgm:base64,",
+ "ppm": f"data:image/ppm:base64,",
+ "tiff": f"Bdata:image/tiff:base64MP,",
+ "rast": f"data:image/rast:base64,",
+ "xbm": f"data:image/xbm:base64,",
+ "jpeg": f"data:image/jpeg:base64,",
+ "bmp": f"data:image/bmp:base64,",
+ "png": f"data:image/png:base64,",
+ "webp": f"data:image/webp:base64,",
+ "exr": f"data:image/exr:base64,"
+ }
+ return format_strings.get(image_format.lower())
+
+
+def equations_from_image(image: str) -> dict:
+ print("Translating equations from image...")
+
+ print("Validating and encoding image...")
+ base64_image_str = get_image_format_string(
+ imghdr.what(None, h=base64.b64decode(image))
+ )
+
+ print("Uploading and validating equations schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'equations.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-mini",
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ temperature=0,
+ seed=234,
+ max_tokens=1024,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "equations",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": EQUATIONS_FROM_IMAGE_PROMPT},
+ {"type": "image_url", "image_url": {"url": f"{base64_image_str}{image}"}}
+ ]
+ },
+ ],
+ )
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ print("There are", len(output_json['equations']), "equations identified from the image.")
+
+ return output_json
+
+
+def interventions_from_document(research_paper: str, amr: str) -> dict:
+ print("Extracting and formatting research paper...")
+ research_paper = normalize_greek_alphabet(research_paper)
+
+ print("Uploading and validating intervention policy schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'intervention_policy.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ print("Building prompt to extract model configurations from a reasearch paper...")
+ prompt = INTERVENTIONS_FROM_DOCUMENT_PROMPT.format(
+ amr=escape_curly_braces(amr),
+ research_paper=escape_curly_braces(research_paper)
+ )
+
+ print("Sending request to OpenAI API...")
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ frequency_penalty=0,
+ max_tokens=4000,
+ presence_penalty=0,
+ seed=905,
+ temperature=0,
+ top_p=1,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "intervention_policy",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {"role": "user", "content": prompt},
+ ]
+ )
+
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ print("There are ", len(output_json["interventionPolicies"]), "intervention policies identified from the text.")
+
+ return output_json
+
+
+def model_config_from_document(research_paper: str, amr: str) -> dict:
+ print("Extracting and formatting research paper...")
+ research_paper = normalize_greek_alphabet(research_paper)
+
+ print("Uploading and validating model configuration schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'configuration.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ print("Building prompt to extract model configurations from a reasearch paper...")
+ prompt = CONFIGURE_FROM_DOCUMENT_PROMPT.format(
+ amr=escape_curly_braces(amr),
+ research_paper=escape_curly_braces(research_paper)
+ )
+
+ print("Sending request to OpenAI API...")
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ frequency_penalty=0,
+ max_tokens=4000,
+ presence_penalty=0,
+ seed=905,
+ temperature=0,
+ top_p=1,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "model_configurations",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {"role": "user", "content": prompt},
+ ]
+ )
+
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ print("There are ", len(output_json["conditions"]), "conditions identified from the text.")
+
+ return model_config_adapter(output_json)
+
+
+def amr_enrichment_chain(amr: str, research_paper: str) -> dict:
+ amr_param_states = parse_param_initials(amr)
+ prompt = ENRICH_PROMPT.format(
+ param_initial_dict=amr_param_states,
+ paper_text=escape_curly_braces(research_paper)
+ )
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ max_tokens=4000,
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ seed=123,
+ temperature=0,
+ response_format={"type": "json_object"},
+ messages=[
+ {"role": "user", "content": prompt},
+ ],
+ )
+ return postprocess_oai_json(output.choices[0].message.content)
+
+
+def model_card_chain(amr: str, research_paper: str = None) -> dict:
+ print("Creating model card...")
+ assert amr, "An AMR model must be provided."
+ if not research_paper:
+ research_paper = "NO RESEARCH PAPER PROVIDED"
+
+ print("Uploading and validating model card schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'model_card.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ prompt = INSTRUCTIONS.format(
+ research_paper=escape_curly_braces(research_paper),
+ amr=escape_curly_braces(amr)
+ )
+
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ temperature=0,
+ frequency_penalty=0,
+ max_tokens=4000,
+ presence_penalty=0,
+ seed=123,
+ top_p=1,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "model_card",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {"role": "user", "content": prompt},
+ ],
+ )
+
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ return output_json
+
+
+def condense_chain(query: str, chunks: List[str], max_tokens: int = 16385) -> str:
+ print("Condensing chunks for query: {}".format(query[:100]))
+ prompt = CONDENSE_PROMPT.format(query=query, chunks=format_chunks(chunks))
+ if exceeds_tokens(prompt, max_tokens):
+ raise ValueError(
+ "Prompt exceeds max tokens. Reduce number of chunks by reducing K in KNN search."
+ )
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ temperature=0,
+ seed=123,
+ max_tokens=1024,
+ messages=[
+ {"role": "user", "content": prompt},
+ ],
+ )
+ return output.choices[0].message.content
+
+
+def generate_response(instruction: str, response_format: ResponseFormat | None = None) -> str:
+ prompt = GENERAL_INSTRUCTION_PROMPT.format(instruction=instruction)
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ temperature=0,
+ seed=123,
+ max_tokens=2048,
+ response_format=response_format,
+ messages=[
+ {"role": "user", "content": prompt},
+ ],
+ )
+ return output.choices[0].message.content
+
+
+def embedding_chain(text: str) -> List:
+ print("Creating embeddings for text: {}".format(text[:100]))
+ client = OpenAI()
+ output = client.embeddings.create(model="text-embedding-ada-002", input=text)
+ return output.data[0].embedding
+
+
+def model_config_from_dataset(amr: str, dataset: List[str], matrix: str) -> str:
+ print("Extracting datasets...")
+ dataset_text = os.linesep.join(dataset)
+
+ print("Uploading and validating model configuration schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'configuration.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ print("Building prompt to extract model configurations from a dataset...")
+ prompt = (CONFIGURE_FROM_DATASET_PROMPT
+ + CONFIGURE_FROM_DATASET_MAPPING_PROMPT
+ + CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT
+ + CONFIGURE_FROM_DATASET_AMR_PROMPT.format(amr=amr)
+ + CONFIGURE_FROM_DATASET_DATASET_PROMPT.format(data=dataset_text))
+
+ if matrix:
+ prompt += CONFIGURE_FROM_DATASET_MATRIX_PROMPT.format(matrix=matrix)
+
+ prompt += "Answer:"
+
+ print("Sending request to OpenAI API...")
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-2024-08-06",
+ frequency_penalty=0,
+ max_tokens=10000,
+ presence_penalty=0,
+ seed=123,
+ temperature=0,
+ top_p=1,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "model_configurations",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {"role": "user", "content": prompt},
+ ],
+ )
+
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ print("There are ", len(output_json["conditions"]), "conditions identified from the datasets.")
+
+ return model_config_adapter(output_json)
+
+
+def compare_models(amrs: List[str]) -> str:
+ print("Comparing models...")
+
+ print("Building prompt to compare models...")
+ joined_escaped_amrs = "\n\n------\n\n".join([escape_curly_braces(amr) for amr in amrs])
+ prompt = MODEL_METADATA_COMPARE_PROMPT.format(
+ amrs=joined_escaped_amrs
+ )
+
+ print("Uploading and validating compare models schema...")
+ config_path = os.path.join(SCRIPT_DIR, 'schemas', 'compare_models.json')
+ with open(config_path, 'r') as config_file:
+ response_schema = json.load(config_file)
+ validate_schema(response_schema)
+
+ client = OpenAI()
+ output = client.chat.completions.create(
+ model="gpt-4o-mini",
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ seed=123,
+ temperature=0,
+ max_tokens=2048,
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "compare_models",
+ "schema": response_schema
+ }
+ },
+ messages=[
+ {"role": "user", "content": prompt},
+ ]
+ )
+
+ print("Received response from OpenAI API. Formatting response to work with HMI...")
+ output_json = json.loads(output.choices[0].message.content)
+
+ return output_json
diff --git a/packages/gollm/requirements.txt b/packages/gollm/requirements.txt
new file mode 100644
index 0000000000..c0c2e97e85
--- /dev/null
+++ b/packages/gollm/requirements.txt
@@ -0,0 +1,9 @@
+## Used for docker image.
+fastapi
+openai==1.50.2
+pandas
+pydantic==2.9.2
+regex
+tiktoken
+uvicorn
+jsonschema
diff --git a/packages/gollm/setup.py b/packages/gollm/setup.py
index 9b69608f8d..b86b004612 100644
--- a/packages/gollm/setup.py
+++ b/packages/gollm/setup.py
@@ -1,21 +1,31 @@
from setuptools import setup, find_packages
setup(
- name="gollm_task",
+ name="gollm",
version="0.1.0",
packages=find_packages(),
- install_requires=[],
+ install_requires=[
+ "openai==1.50.2",
+ "pandas",
+ "pydantic==2.9.2",
+ "regex",
+ "tiktoken",
+ "jsonschema"
+ ],
+ package_data={
+ "gollm_openai.schemas": ["*.json"]
+ },
entry_points={
"console_scripts": [
- "gollm_task:compare_models=tasks.compare_models:main",
- "gollm_task:configure_model_from_dataset=tasks.configure_model_from_dataset:main",
- "gollm_task:configure_model_from_document=tasks.configure_model_from_document:main",
- "gollm_task:embedding=tasks.embedding:main",
- "gollm_task:enrich_amr=tasks.enrich_amr:main",
- "gollm_task:equations_from_image=tasks.equations_from_image:main",
- "gollm_task:generate_response=tasks.generate_response:main",
- "gollm_task:generate_summary=tasks.generate_summary:main",
- "gollm_task:model_card=tasks.model_card:main",
+ "gollm:compare_models=tasks.compare_models:main",
+ "gollm:configure_model_from_dataset=tasks.configure_model_from_dataset:main",
+ "gollm:configure_model_from_document=tasks.configure_model_from_document:main",
+ "gollm:embedding=tasks.embedding:main",
+ "gollm:enrich_amr=tasks.enrich_amr:main",
+ "gollm:equations_from_image=tasks.equations_from_image:main",
+ "gollm:generate_response=tasks.generate_response:main",
+ "gollm:generate_summary=tasks.generate_summary:main",
+ "gollm:model_card=tasks.model_card:main",
],
},
python_requires=">=3.11",
diff --git a/packages/gollm/tasks/compare_models.py b/packages/gollm/tasks/compare_models.py
index 715fdde5c0..302ed6926a 100644
--- a/packages/gollm/tasks/compare_models.py
+++ b/packages/gollm/tasks/compare_models.py
@@ -1,6 +1,6 @@
import sys
-from gollm.entities import ModelCompareModel
-from gollm.openai.tool_utils import compare_models
+from entities import ModelCompareModel
+from gollm_openai.tool_utils import compare_models
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/configure_model_from_dataset.py b/packages/gollm/tasks/configure_model_from_dataset.py
index acdd7cb75e..932ef16de3 100644
--- a/packages/gollm/tasks/configure_model_from_dataset.py
+++ b/packages/gollm/tasks/configure_model_from_dataset.py
@@ -1,7 +1,7 @@
import json
import sys
-from gollm.entities import ConfigureModelDataset
-from gollm.openai.tool_utils import model_config_from_dataset
+from entities import ConfigureModelDataset
+from gollm_openai.tool_utils import model_config_from_dataset
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/configure_model_from_document.py b/packages/gollm/tasks/configure_model_from_document.py
index 81a671bfa9..c1cbc8b67c 100644
--- a/packages/gollm/tasks/configure_model_from_document.py
+++ b/packages/gollm/tasks/configure_model_from_document.py
@@ -1,7 +1,7 @@
import json
import sys
-from gollm.entities import ConfigureModelDocument
-from gollm.openai.tool_utils import model_config_from_document
+from entities import ConfigureModelDocument
+from gollm_openai.tool_utils import model_config_from_document
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/embedding.py b/packages/gollm/tasks/embedding.py
index 3910d4cda7..140ec56564 100644
--- a/packages/gollm/tasks/embedding.py
+++ b/packages/gollm/tasks/embedding.py
@@ -1,7 +1,7 @@
import sys
+from entities import EmbeddingModel
+from gollm_openai.tool_utils import embedding_chain
-from gollm.entities import EmbeddingModel
-from gollm.openai.tool_utils import embedding_chain
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/enrich_amr.py b/packages/gollm/tasks/enrich_amr.py
index 110b7bcbc8..fb265b5e3f 100644
--- a/packages/gollm/tasks/enrich_amr.py
+++ b/packages/gollm/tasks/enrich_amr.py
@@ -1,6 +1,7 @@
import json
import sys
-from gollm.openai.tool_utils import amr_enrichment_chain
+from gollm_openai.tool_utils import amr_enrichment_chain
+
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/equations_from_image.py b/packages/gollm/tasks/equations_from_image.py
index c565433ff1..4066506760 100644
--- a/packages/gollm/tasks/equations_from_image.py
+++ b/packages/gollm/tasks/equations_from_image.py
@@ -1,6 +1,6 @@
import sys
-from gollm.entities import EquationsFromImage
-from gollm.openai.tool_utils import equations_from_image
+from entities import EquationsFromImage
+from gollm_openai.tool_utils import equations_from_image
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/generate_response.py b/packages/gollm/tasks/generate_response.py
index 687b67be5f..d58f48254d 100644
--- a/packages/gollm/tasks/generate_response.py
+++ b/packages/gollm/tasks/generate_response.py
@@ -1,5 +1,6 @@
import sys
-from gollm.openai.tool_utils import generate_response
+from gollm_openai.tool_utils import generate_response
+
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/generate_summary.py b/packages/gollm/tasks/generate_summary.py
index a7506958f6..a572e4c287 100644
--- a/packages/gollm/tasks/generate_summary.py
+++ b/packages/gollm/tasks/generate_summary.py
@@ -1,5 +1,6 @@
import sys
-from gollm.openai.tool_utils import generate_response
+from gollm_openai.tool_utils import generate_response
+
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/interventions_from_document.py b/packages/gollm/tasks/interventions_from_document.py
index 7c847250fd..f095bd8fbc 100644
--- a/packages/gollm/tasks/interventions_from_document.py
+++ b/packages/gollm/tasks/interventions_from_document.py
@@ -1,7 +1,7 @@
import json
import sys
-from gollm.entities import InterventionsFromDocument
-from gollm.openai.tool_utils import interventions_from_document
+from entities import InterventionsFromDocument
+from gollm_openai.tool_utils import interventions_from_document
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/tasks/model_card.py b/packages/gollm/tasks/model_card.py
index 34d73e2c06..adfb1e2d25 100644
--- a/packages/gollm/tasks/model_card.py
+++ b/packages/gollm/tasks/model_card.py
@@ -1,6 +1,6 @@
import sys
-from gollm.entities import ModelCardModel
-from gollm.openai.tool_utils import model_card_chain
+from entities import ModelCardModel
+from gollm_openai.tool_utils import model_card_chain
from taskrunner import TaskRunnerInterface
diff --git a/packages/gollm/utils.py b/packages/gollm/utils.py
new file mode 100644
index 0000000000..6e7b595912
--- /dev/null
+++ b/packages/gollm/utils.py
@@ -0,0 +1,145 @@
+import json
+import jsonschema
+import regex as re
+import tiktoken
+
+
+def remove_references(text: str) -> str:
+ """
+ Removes reference sections from a scientific paper.
+ """
+ pattern = r"References\n([\s\S]*?)(?:\n\n|\Z)"
+ new_text = re.sub(pattern, "", text)
+ return new_text.strip()
+
+
+def parse_param_initials(amr: dict):
+ try:
+ ode = amr['semantics']['ode']
+ except KeyError:
+ raise KeyError("ODE semantics not found in AMR, please provide a valide AMR with structure semantics.ode")
+
+ assert 'parameters' in ode, "No parameters found in ODE semantics, please provide a valid AMR with structure semtnatics.ode.parameters"
+ assert 'initials' in ode, "No initials found in ODE semantics, please provide a valid AMR with structure semantics.ode.initials"
+
+ params = ode['parameters']
+
+ assert all(['id' in p.keys() for p in params]), "All parameters must have an 'id' key"
+
+ param_ids = [p['id'] for p in params if p is not None and p.get('id')]
+
+ initials = ode['initials']
+
+ assert all(['target' in i.keys() for i in initials]), "All initials must have an 'id' key"
+
+ initial_ids = [i['target'] for i in initials if i is not None and i.get('target')]
+
+ return {'initial_names': initial_ids, 'param_names': param_ids}
+
+
+def parse_json_from_markdown(text):
+ print("Stripping markdown...")
+ json_pattern = r"```json\s*(\{.*?\})\s*```"
+ match = re.search(json_pattern, text, re.DOTALL)
+ if match:
+ return match.group(1)
+ else:
+ print(f"No markdown found in text: {text}")
+ return text
+
+
+def extract_json(text: str) -> dict:
+ corrected_text = text.replace("{{", "{").replace("}}", "}")
+ try:
+ json_obj = json.loads(corrected_text)
+ return json_obj
+ except json.JSONDecodeError as e:
+ raise ValueError(f"Error decoding JSON: {e}\nfrom text {text}")
+
+
+def validate_schema(schema):
+ try:
+ jsonschema.Draft202012Validator.check_schema(schema)
+ print("Schema is valid.")
+ except jsonschema.exceptions.SchemaError as e:
+ print(f"Schema is invalid: {e.message}")
+
+
+def postprocess_oai_json(output: str) -> dict:
+ output = "{" + parse_json_from_markdown(
+ output
+ ) # curly bracket is used in all prompts to denote start of json.
+ return extract_json(output)
+
+
+def normalize_greek_alphabet(text: str) -> str:
+ greek_to_english = {
+ "α": "alpha",
+ "β": "beta",
+ "γ": "gamma",
+ "δ": "delta",
+ "ε": "epsilon",
+ "ζ": "zeta",
+ "η": "eta",
+ "θ": "theta",
+ "ι": "iota",
+ "κ": "kappa",
+ "λ": "lambda",
+ "μ": "mu",
+ "ν": "nu",
+ "ξ": "xi",
+ "ο": "omicron",
+ "π": "pi",
+ "ρ": "rho",
+ "σ": "sigma",
+ "τ": "tau",
+ "υ": "upsilon",
+ "φ": "phi",
+ "χ": "chi",
+ "ψ": "psi",
+ "ω": "omega",
+ }
+
+ normalized_text = ""
+ for char in text:
+ if char.lower() in greek_to_english:
+ normalized_text += greek_to_english[char.lower()]
+ else:
+ normalized_text += char
+ return normalized_text
+
+
+def exceeds_tokens(prompt: str, max_tokens: int) -> bool:
+ enc = tiktoken.get_encoding("cl100k_base")
+ if len(enc.encode(prompt)) > max_tokens:
+ return True
+ return False
+
+
+# Adapter function which converts the model config dict to HMI expected format.
+def model_config_adapter(model_config: dict) -> dict:
+ # for each condition and for each parameter semantic in the model configuration,
+ # if the distribution is not `contant`, remove the `value` key
+ # otherwise, remove the maximum and minimum keys.
+ for condition in model_config["conditions"]:
+ print(condition)
+ for param in condition["parameterSemanticList"]:
+ if param["distribution"]["type"].casefold() == "constant":
+ param["distribution"]["type"] = "Constant"
+ param["distribution"]["parameters"].pop("minimum", None)
+ param["distribution"]["parameters"].pop("maximum", None)
+ elif param["distribution"]["type"].casefold() == "uniform":
+ param["distribution"]["type"] = "Uniform"
+ param["distribution"]["parameters"].pop("value", None)
+ else:
+ if "value" in param["distribution"]["parameters"]:
+ param["distribution"]["type"] = "Constant"
+ param["distribution"]["parameters"].pop("minimum", None)
+ param["distribution"]["parameters"].pop("maximum", None)
+ elif "minimum" in param["distribution"]["parameters"] and "maximum" in param["distribution"]["parameters"]:
+ param["distribution"]["type"] = "Uniform"
+ param["distribution"]["parameters"].pop("value", None)
+ else:
+ raise ValueError("Invalid distribution type")
+
+ return model_config
diff --git a/packages/server/gradle/wrapper/gradle-wrapper.properties b/packages/server/gradle/wrapper/gradle-wrapper.properties
index 9355b41557..df97d72b8b 100644
--- a/packages/server/gradle/wrapper/gradle-wrapper.properties
+++ b/packages/server/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/gollm/GoLLMController.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/gollm/GoLLMController.java
index ec1fdda042..1b8548516e 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/gollm/GoLLMController.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/gollm/GoLLMController.java
@@ -40,6 +40,7 @@
import org.springframework.web.server.ResponseStatusException;
import software.uncharted.terarium.hmiserver.models.dataservice.dataset.Dataset;
import software.uncharted.terarium.hmiserver.models.dataservice.document.DocumentAsset;
+import software.uncharted.terarium.hmiserver.models.dataservice.document.ExtractedDocumentPage;
import software.uncharted.terarium.hmiserver.models.dataservice.model.Model;
import software.uncharted.terarium.hmiserver.models.task.TaskRequest;
import software.uncharted.terarium.hmiserver.models.task.TaskRequest.TaskType;
@@ -142,7 +143,7 @@ public ResponseEntity createModelCardTask(
}
final ModelCardResponseHandler.Input input = new ModelCardResponseHandler.Input();
- input.setAmr(model.get().serializeWithoutTerariumFields());
+ input.setAmr(model.get().serializeWithoutTerariumFields(null, new String[] { "gollmCard" }));
// Grab the document
final DocumentAsset document;
@@ -267,11 +268,32 @@ public ResponseEntity createConfigureModelFromDocumentTask(
}
final ConfigureModelFromDocumentResponseHandler.Input input = new ConfigureModelFromDocumentResponseHandler.Input();
- input.setResearchPaper(document.get().getText());
+
+ String text = "";
+ if (document.get().getExtractions().size() > 0) {
+ for (final ExtractedDocumentPage page : document.get().getExtractions()) {
+ text += page.getText() + "\n";
+ if (page.getTables() != null) {
+ for (final JsonNode table : page.getTables()) {
+ text += table.toString() + "\n";
+ }
+ }
+ if (page.getEquations() != null) {
+ for (final JsonNode equation : page.getEquations()) {
+ text += equation.toString() + "\n";
+ }
+ }
+ }
+ } else {
+ text = document.get().getText();
+ }
+
+ input.setResearchPaper(text);
+
// stripping the metadata from the model before its sent since it can cause
// gollm to fail with massive inputs
model.get().setMetadata(null);
- input.setAmr(model.get().serializeWithoutTerariumFieldsKeepId());
+ input.setAmr(model.get().serializeWithoutTerariumFields(new String[] { "id" }, null));
// Create the task
final TaskRequest req = new TaskRequest();
@@ -399,7 +421,7 @@ public ResponseEntity createConfigureModelFromDatasetTask(
// stripping the metadata from the model before its sent since it can cause
// gollm to fail with massive inputs
model.get().setMetadata(null);
- input.setAmr(model.get().serializeWithoutTerariumFields());
+ input.setAmr(model.get().serializeWithoutTerariumFields(null, null));
// set matrix string if provided
if (body != null && !body.getMatrixStr().isEmpty()) {
@@ -509,7 +531,7 @@ public ResponseEntity createInterventionsFromDocumentTask(
// stripping the metadata from the model before its sent since it can cause
// gollm to fail with massive inputs
model.get().setMetadata(null);
- input.setAmr(model.get().serializeWithoutTerariumFieldsKeepId());
+ input.setAmr(model.get().serializeWithoutTerariumFields(new String[] { "id" }, null));
// Create the task
final TaskRequest req = new TaskRequest();
@@ -597,7 +619,7 @@ public ResponseEntity createCompareModelsTask(
throw new ResponseStatusException(HttpStatus.NOT_FOUND, messages.get("model.not-found"));
}
- amrs.add(model.get().serializeWithoutTerariumFields());
+ amrs.add(model.get().serializeWithoutTerariumFields(null, null));
}
// if the number of models is less than 2, return an error
@@ -863,7 +885,7 @@ public ResponseEntity equationsFromImageTask(
byte[] decodedImage;
try {
decodedImage = Base64.getDecoder().decode(image);
- } catch (IllegalArgumentException e) {
+ } catch (final IllegalArgumentException e) {
log.error("Invalid base64 encoding for image", e);
throw new ResponseStatusException(
HttpStatus.BAD_REQUEST,
@@ -873,8 +895,8 @@ public ResponseEntity equationsFromImageTask(
// validate that the image is a valid image
try (ByteArrayInputStream bais = new ByteArrayInputStream(decodedImage)) {
- BufferedImage bi = ImageIO.read(bais);
- } catch (IOException e) {
+ final BufferedImage bi = ImageIO.read(bais);
+ } catch (final IOException e) {
log.error("Invalid image provided", e);
throw new ResponseStatusException(
HttpStatus.BAD_REQUEST,
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/mira/MiraController.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/mira/MiraController.java
index 7a68e59834..8b454fa197 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/mira/MiraController.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/controller/mira/MiraController.java
@@ -130,7 +130,7 @@ public ResponseEntity convertAMRtoMMT(@RequestBody final JsonNode mode
req.setType(TaskType.MIRA);
try {
- req.setInput(objectMapper.treeToValue(model, Model.class).serializeWithoutTerariumFields().getBytes());
+ req.setInput(objectMapper.treeToValue(model, Model.class).serializeWithoutTerariumFields(null, null).getBytes());
} catch (final Exception e) {
log.error("Unable to serialize input", e);
throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, messages.get("generic.io-error.write"));
@@ -190,7 +190,7 @@ public ResponseEntity generateModelLatex(@RequestBody final JsonNode m
req.setType(TaskType.MIRA);
try {
- req.setInput(objectMapper.treeToValue(model, Model.class).serializeWithoutTerariumFields().getBytes());
+ req.setInput(objectMapper.treeToValue(model, Model.class).serializeWithoutTerariumFields(null, null).getBytes());
} catch (final Exception e) {
log.error("Unable to serialize input", e);
throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, messages.get("generic.io-error.write"));
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/TerariumAsset.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/TerariumAsset.java
index 6edeec1b8e..55cb5a3418 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/TerariumAsset.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/TerariumAsset.java
@@ -11,7 +11,10 @@
import jakarta.persistence.MappedSuperclass;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
import java.util.List;
+import javax.annotation.Nullable;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;
@@ -77,44 +80,60 @@ protected TerariumAsset cloneSuperFields(final TerariumAsset asset) {
return asset;
}
- public String serializeWithoutTerariumFields() {
- final ObjectMapper mapper = new ObjectMapper();
- mapper.setConfig(mapper.getSerializationConfig().with(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY));
- final ObjectNode objectNode = mapper.convertValue(this, ObjectNode.class);
- objectNode.remove("id");
- objectNode.remove("createdOn");
- objectNode.remove("updatedOn");
- objectNode.remove("deletedOn");
- objectNode.remove("name");
- objectNode.remove("description");
- objectNode.remove("temporary");
- objectNode.remove("publicAsset");
- objectNode.remove("fileNames");
- objectNode.remove("userId");
-
- // Remove the field metadata.description as well
- final JsonNode metadata = objectNode.get("metadata");
- if (metadata != null) {
- ((ObjectNode) metadata).remove("description");
+ public static void removeFieldsWithKeys(ObjectNode objectNode, List keys) {
+ Iterator keysIterator = objectNode.fieldNames();
+ while (keysIterator.hasNext()) {
+ String key = keysIterator.next();
+ if (keys.contains(key)) {
+ keysIterator.remove();
+ } else {
+ JsonNode node = objectNode.get(key);
+ if (node.isObject()) {
+ removeFieldsWithKeys((ObjectNode) node, keys);
+ }
+ }
}
- objectNode.set("metadata", metadata);
-
- return objectNode.toString();
}
- public String serializeWithoutTerariumFieldsKeepId() {
+ /**
+ * Serialize the asset to a JSON string, removing the fields that are not needed.
+ *
+ * @param keepFields A list of fields that should not be removed.
+ * @param additionalDeleteFields Additional fields to remove.
+ * @return The JSON string.
+ */
+ public String serializeWithoutTerariumFields(
+ @Nullable String[] keepFields,
+ @Nullable String[] additionalDeleteFields
+ ) {
final ObjectMapper mapper = new ObjectMapper();
mapper.setConfig(mapper.getSerializationConfig().with(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY));
final ObjectNode objectNode = mapper.convertValue(this, ObjectNode.class);
- objectNode.remove("createdOn");
- objectNode.remove("updatedOn");
- objectNode.remove("deletedOn");
- objectNode.remove("name");
- objectNode.remove("description");
- objectNode.remove("temporary");
- objectNode.remove("publicAsset");
- objectNode.remove("fileNames");
- objectNode.remove("userId");
+
+ // Fields to delete
+ String[] deleteFields = new String[] {
+ "id",
+ "createdOn",
+ "updatedOn",
+ "deletedOn",
+ "name",
+ "description",
+ "temporary",
+ "publicAsset",
+ "fileNames",
+ "userId"
+ };
+ List deleteFieldsList = new ArrayList<>(Arrays.asList(deleteFields));
+ if (keepFields != null) {
+ for (String field : keepFields) {
+ deleteFieldsList.removeIf(key -> key.equals(field));
+ }
+ }
+
+ // Remove the fields that are not needed
+ for (String key : deleteFieldsList) {
+ objectNode.remove(key);
+ }
// Remove the field metadata.description as well
final JsonNode metadata = objectNode.get("metadata");
@@ -123,6 +142,11 @@ public String serializeWithoutTerariumFieldsKeepId() {
}
objectNode.set("metadata", metadata);
+ // Remove additional fields if they exist
+ if (additionalDeleteFields != null) {
+ removeFieldsWithKeys(objectNode, Arrays.asList(additionalDeleteFields));
+ }
+
return objectNode.toString();
}
}
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/DocumentAsset.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/DocumentAsset.java
index ef5109f94d..f549b72bd0 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/DocumentAsset.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/DocumentAsset.java
@@ -67,8 +67,14 @@ public class DocumentAsset extends TerariumAsset {
@TSOptional
@Type(JsonType.class)
@Column(columnDefinition = "json")
+ @Deprecated
private List assets;
+ @TSOptional
+ @Type(JsonType.class)
+ @Column(columnDefinition = "json")
+ private List extractions = new ArrayList<>();
+
@Override
public List getFileNames() {
if (this.fileNames == null) {
@@ -103,6 +109,9 @@ public DocumentAsset clone() {
clone.source = this.source;
clone.text = this.text;
+ for (final ExtractedDocumentPage extraction : this.extractions) {
+ clone.extractions.add(extraction.clone());
+ }
if (this.grounding != null) {
clone.grounding = this.grounding.clone();
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/ExtractedDocumentPage.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/ExtractedDocumentPage.java
new file mode 100644
index 0000000000..2d44997ef6
--- /dev/null
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/models/dataservice/document/ExtractedDocumentPage.java
@@ -0,0 +1,44 @@
+package software.uncharted.terarium.hmiserver.models.dataservice.document;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.Serial;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+@Data
+@Accessors(chain = true)
+public class ExtractedDocumentPage implements Serializable {
+
+ @Serial
+ private static final long serialVersionUID = 7781295818378531195L;
+
+ Integer pageNumber;
+
+ String text;
+ List tables = new ArrayList<>();
+
+ List equations = new ArrayList<>();
+
+ @Override
+ public ExtractedDocumentPage clone() {
+ final ExtractedDocumentPage clone = new ExtractedDocumentPage();
+
+ clone.pageNumber = this.pageNumber;
+ clone.text = this.text;
+
+ clone.tables = new ArrayList<>();
+ for (final JsonNode table : this.tables) {
+ clone.tables.add(table.deepCopy());
+ }
+
+ clone.equations = new ArrayList<>();
+ for (final JsonNode equation : this.equations) {
+ clone.equations.add(equation.deepCopy());
+ }
+
+ return clone;
+ }
+}
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/ExtractionService.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/ExtractionService.java
index deb6945af2..1e2b6cc52b 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/ExtractionService.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/ExtractionService.java
@@ -46,6 +46,7 @@
import software.uncharted.terarium.hmiserver.models.TerariumAssetEmbeddings;
import software.uncharted.terarium.hmiserver.models.dataservice.document.DocumentAsset;
import software.uncharted.terarium.hmiserver.models.dataservice.document.DocumentExtraction;
+import software.uncharted.terarium.hmiserver.models.dataservice.document.ExtractedDocumentPage;
import software.uncharted.terarium.hmiserver.models.dataservice.document.ExtractionAssetType;
import software.uncharted.terarium.hmiserver.models.dataservice.model.Model;
import software.uncharted.terarium.hmiserver.models.dataservice.modelparts.ModelMetadata;
@@ -173,12 +174,12 @@ static class ExtractPDFResponse {
String documentAbstract;
String documentText;
+ List documentTextPerPage;
List files = new ArrayList<>();
List assets = new ArrayList<>();
List equations = new ArrayList<>();
List tables = new ArrayList<>();
ArrayNode variableAttributes;
- JsonNode gollmCard;
boolean partialFailure = true;
}
@@ -222,6 +223,7 @@ public ExtractPDFResponse runExtractPDF(
log.info("Text extraction complete for document: {}", documentName);
extractionResponse.documentAbstract = textExtraction.documentAbstract;
extractionResponse.documentText = textExtraction.documentText;
+ extractionResponse.documentTextPerPage = textExtraction.documentTextPerPage;
extractionResponse.assets = textExtraction.assets;
extractionResponse.files = textExtraction.files;
@@ -307,39 +309,43 @@ public DocumentAsset applyExtractPDFResponse(
document.setText(extractionResponse.documentText);
}
+ int pageNum = 0;
+ for (final String page : extractionResponse.documentTextPerPage) {
+ final ExtractedDocumentPage p = new ExtractedDocumentPage();
+ p.setText(page);
+ p.setPageNumber(pageNum);
+
+ document.getExtractions().add(p);
+ pageNum++;
+ }
+
if (extractionResponse.documentAbstract != null) {
document.setDocumentAbstract(extractionResponse.documentAbstract);
}
- if (extractionResponse.variableAttributes != null) {
- if (document.getMetadata() == null) {
- document.setMetadata(new HashMap<>());
- }
- document.getMetadata().put("attributes", extractionResponse.variableAttributes);
+ if (document.getMetadata() == null) {
+ document.setMetadata(new HashMap<>());
}
- if (extractionResponse.gollmCard != null) {
- if (document.getMetadata() == null) {
- document.setMetadata(new HashMap<>());
- }
- document.getMetadata().put("gollmCard", extractionResponse.gollmCard);
+ if (extractionResponse.variableAttributes != null) {
+ document.getMetadata().put("attributes", extractionResponse.variableAttributes);
}
+ pageNum = 0;
if (extractionResponse.equations != null) {
- if (document.getMetadata() == null) {
- document.setMetadata(new HashMap<>());
- }
document.getMetadata().put("equations", objectMapper.valueToTree(extractionResponse.equations));
+
+ document.getExtractions().get(pageNum).setEquations(extractionResponse.equations);
+ pageNum++;
}
+ pageNum = 0;
if (extractionResponse.tables != null) {
- if (document.getMetadata() == null) {
- document.setMetadata(new HashMap<>());
- }
document.getMetadata().put("tables", objectMapper.valueToTree(extractionResponse.tables));
- }
- log.info("Added extraction to document: {}", documentId);
+ document.getExtractions().get(pageNum).setEquations(extractionResponse.tables);
+ pageNum++;
+ }
return documentService.updateAsset(document, projectId, hasWritePermission).orElseThrow();
}
@@ -832,6 +838,7 @@ static class TextExtraction {
String documentAbstract;
String documentText;
+ List documentTextPerPage = new ArrayList<>();
List assets = new ArrayList<>();
List files = new ArrayList<>();
}
@@ -861,7 +868,11 @@ public Future extractTextFromPDF(
final TextExtraction extraction = new TextExtraction();
- extraction.documentText = output.getResponse().asText();
+ extraction.documentText = "";
+ for (final JsonNode page : output.getResponse()) {
+ extraction.documentText += page.asText() + "\n";
+ extraction.documentTextPerPage.add(page.asText());
+ }
return extraction;
});
@@ -909,7 +920,18 @@ public Future extractTablesFromPDF(
final TableExtraction extraction = new TableExtraction();
for (final String key : keys) {
- extraction.tables.add(output.getResponse().get(key));
+ final JsonNode page = output.getResponse().get(key);
+ if (page.isArray()) {
+ final ArrayNode pageOfTables = objectMapper.createArrayNode();
+ for (final JsonNode table : page) {
+ JsonNode t = table;
+ if (table.isTextual()) {
+ t = objectMapper.readTree(table.asText());
+ }
+ pageOfTables.add(t);
+ }
+ extraction.tables.add(pageOfTables);
+ }
}
return extraction;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/gollm/EmbeddingService.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/gollm/EmbeddingService.java
index 1ee57d52a3..5997c0aec5 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/gollm/EmbeddingService.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/gollm/EmbeddingService.java
@@ -48,7 +48,7 @@ private static class EmbeddingsResponse {
}
public TerariumAssetEmbeddings generateEmbeddings(final String input)
- throws JsonProcessingException, TimeoutException, InterruptedException, ExecutionException, IOException {
+ throws TimeoutException, InterruptedException, ExecutionException, IOException {
// create the embedding search request
final GoLLMSearchRequest embeddingRequest = new GoLLMSearchRequest();
embeddingRequest.setText(input);
@@ -58,7 +58,7 @@ public TerariumAssetEmbeddings generateEmbeddings(final String input)
req.setTimeoutMinutes(REQUEST_TIMEOUT_MINUTES);
req.setType(TaskType.GOLLM);
req.setInput(embeddingRequest);
- req.setScript("gollm_task:embedding");
+ req.setScript("gollm:embedding");
try {
req.setUserId(currentUserService.get().getId());
} catch (Exception e) {
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/CompareModelsResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/CompareModelsResponseHandler.java
index 0949be4145..38def28457 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/CompareModelsResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/CompareModelsResponseHandler.java
@@ -11,7 +11,7 @@
@RequiredArgsConstructor
public class CompareModelsResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:compare_models";
+ public static final String NAME = "gollm:compare_models";
@Override
public String getName() {
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDatasetResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDatasetResponseHandler.java
index f36c5acde5..8a5fabf15e 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDatasetResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDatasetResponseHandler.java
@@ -26,7 +26,7 @@
@Slf4j
public class ConfigureModelFromDatasetResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:configure_model_from_dataset";
+ public static final String NAME = "gollm:configure_model_from_dataset";
private final ObjectMapper objectMapper;
private final ModelConfigurationService modelConfigurationService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDocumentResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDocumentResponseHandler.java
index 3851b52c1b..0d737044b2 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDocumentResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ConfigureModelFromDocumentResponseHandler.java
@@ -25,7 +25,7 @@
@Slf4j
public class ConfigureModelFromDocumentResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:configure_model_from_document";
+ public static final String NAME = "gollm:configure_model_from_document";
private final ObjectMapper objectMapper;
private final ModelConfigurationService modelConfigurationService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EnrichAmrResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EnrichAmrResponseHandler.java
index 2961d2e9c3..9846695ca1 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EnrichAmrResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EnrichAmrResponseHandler.java
@@ -17,7 +17,7 @@
@Slf4j
public class EnrichAmrResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:enrich_amr";
+ public static final String NAME = "gollm:enrich_amr";
private final ObjectMapper objectMapper;
private final ModelService modelService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EquationsFromImageResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EquationsFromImageResponseHandler.java
index 8491e1f288..7b3af89ea8 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EquationsFromImageResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/EquationsFromImageResponseHandler.java
@@ -24,7 +24,7 @@
@Slf4j
public class EquationsFromImageResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:equations_from_image";
+ public static final String NAME = "gollm:equations_from_image";
private final ObjectMapper objectMapper;
private final DocumentAssetService documentService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateResponseHandler.java
index 32de755b9f..b6b581e152 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateResponseHandler.java
@@ -1,7 +1,6 @@
package software.uncharted.terarium.hmiserver.service.tasks;
import com.fasterxml.jackson.databind.JsonNode;
-import java.util.UUID;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -12,7 +11,7 @@
@Slf4j
public class GenerateResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:generate_response";
+ public static final String NAME = "gollm:generate_response";
@Override
public String getName() {
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateSummaryHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateSummaryHandler.java
index efcbe3eb68..8631153f12 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateSummaryHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/GenerateSummaryHandler.java
@@ -15,7 +15,7 @@
@Slf4j
public class GenerateSummaryHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:generate_summary";
+ public static final String NAME = "gollm:generate_summary";
private final SummaryService summaryService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/InterventionsFromDocumentResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/InterventionsFromDocumentResponseHandler.java
index d24a440fb3..934e501fc8 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/InterventionsFromDocumentResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/InterventionsFromDocumentResponseHandler.java
@@ -22,7 +22,7 @@
@Slf4j
public class InterventionsFromDocumentResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:interventions_from_document";
+ public static final String NAME = "gollm:interventions_from_document";
private final ObjectMapper objectMapper;
private final InterventionService interventionService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ModelCardResponseHandler.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ModelCardResponseHandler.java
index da40ba6c58..953c1279ba 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ModelCardResponseHandler.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/service/tasks/ModelCardResponseHandler.java
@@ -22,7 +22,7 @@
@Slf4j
public class ModelCardResponseHandler extends TaskResponseHandler {
- public static final String NAME = "gollm_task:model_card";
+ public static final String NAME = "gollm:model_card";
private final ObjectMapper objectMapper;
private final DocumentAssetService documentAssetService;
private final ModelService modelService;
diff --git a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/utils/JsonToHTML.java b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/utils/JsonToHTML.java
index 034f75f242..5d07ed355a 100644
--- a/packages/server/src/main/java/software/uncharted/terarium/hmiserver/utils/JsonToHTML.java
+++ b/packages/server/src/main/java/software/uncharted/terarium/hmiserver/utils/JsonToHTML.java
@@ -8,7 +8,7 @@ public class JsonToHTML {
// Function to recursively render JsonNode object into HTML
public static String renderJsonToHTML(JsonNode jsonNode) {
StringBuilder html = new StringBuilder();
- renderObject(jsonNode, html, 0);
+ renderObject(jsonNode, html, 2);
return html.toString();
}
@@ -25,7 +25,7 @@ private static void renderObject(JsonNode jsonNode, StringBuilder html, int leve
.append("")
- .append(capitalizeFirstLetter(fieldName))
+ .append(formatTitle(fieldName))
.append("\n");
@@ -57,10 +57,18 @@ private static void renderArray(JsonNode arrayNode, StringBuilder html, int leve
}
// Helper to capitalize the first letter of a string
- private static String capitalizeFirstLetter(String input) {
+ private static String formatTitle(String input) {
if (input == null || input.isEmpty()) {
return input;
}
- return input.substring(0, 1).toUpperCase() + input.substring(1);
+
+ // Split the string into words and add a space between each word
+ String[] words = input.split("(?=[A-Z])");
+ StringBuilder formatted = new StringBuilder();
+ for (String word : words) {
+ formatted.append(word).append(" ");
+ }
+ final String title = formatted.toString().trim();
+ return title.substring(0, 1).toUpperCase() + title.substring(1);
}
}
diff --git a/packages/server/src/test/java/software/uncharted/terarium/hmiserver/models/TerariumAssetTests.java b/packages/server/src/test/java/software/uncharted/terarium/hmiserver/models/TerariumAssetTests.java
index 08cc446ff5..7f1d615be4 100644
--- a/packages/server/src/test/java/software/uncharted/terarium/hmiserver/models/TerariumAssetTests.java
+++ b/packages/server/src/test/java/software/uncharted/terarium/hmiserver/models/TerariumAssetTests.java
@@ -7,6 +7,7 @@
import software.uncharted.terarium.hmiserver.TerariumApplicationTests;
import software.uncharted.terarium.hmiserver.models.dataservice.model.Model;
import software.uncharted.terarium.hmiserver.models.dataservice.modelparts.ModelHeader;
+import software.uncharted.terarium.hmiserver.models.dataservice.modelparts.ModelMetadata;
@Slf4j
public class TerariumAssetTests extends TerariumApplicationTests {
@@ -32,7 +33,7 @@ void testSerializeWithoutTerariumFields() throws Exception {
.setSchemaName("petrinet")
);
- final String json1 = model1.serializeWithoutTerariumFields();
+ final String json1 = model1.serializeWithoutTerariumFields(null, null);
final Model model2 = new Model();
model2.setName("myname");
@@ -53,10 +54,54 @@ void testSerializeWithoutTerariumFields() throws Exception {
.setSchemaName("petrinet")
);
- final String json2 = model2.serializeWithoutTerariumFields();
+ final String json2 = model2.serializeWithoutTerariumFields(null, null);
log.info("json1: {}", json1);
Assertions.assertEquals(json1, json2);
}
+
+ @Test
+ void testCustomizeSerializeWithoutTerariumFields() throws Exception {
+ final Model model1 = new Model();
+ model1.setName("myname");
+ model1.setDescription("mydescription");
+ model1.setFileNames(null);
+ model1.setDeletedOn(null);
+ model1.setTemporary(false);
+ model1.setPublicAsset(false);
+ model1.setUserId("my user");
+ model1.setCreatedOn(new Timestamp(System.currentTimeMillis()));
+ model1.setUpdatedOn(new Timestamp(System.currentTimeMillis()));
+ model1.setHeader(
+ new ModelHeader()
+ .setName("test-name")
+ .setModelSchema("test-schema")
+ .setModelVersion("0.1.2")
+ .setDescription("test-description")
+ .setSchemaName("petrinet")
+ );
+
+ final String json1 = model1.serializeWithoutTerariumFields(new String[] { "name", "description" }, null);
+
+ // Assert that json1 still contains the name and description fields
+
+ Assertions.assertTrue(json1.contains("\"name\":\"myname\""));
+ Assertions.assertTrue(json1.contains("\"description\":\"mydescription\""));
+
+ // Add an additional field to the model
+ final ModelMetadata metadata = new ModelMetadata();
+ metadata.setCodeId("code_id");
+ model1.setMetadata(metadata);
+
+ final String json2 = model1.serializeWithoutTerariumFields(null, null);
+
+ // Assert that json2 contains the codeId field
+ Assertions.assertTrue(json2.contains("\"code_id\":\"code_id\""));
+
+ final String json3 = model1.serializeWithoutTerariumFields(null, new String[] { "code_id" });
+
+ // Assert that json3 does not contain the codeId field
+ Assertions.assertFalse(json3.contains("\"code_id\":\"code_id\""));
+ }
}
diff --git a/packages/server/src/test/java/software/uncharted/terarium/hmiserver/service/tasks/TaskServiceTest.java b/packages/server/src/test/java/software/uncharted/terarium/hmiserver/service/tasks/TaskServiceTest.java
index b83caa8e3e..1c8f874127 100644
--- a/packages/server/src/test/java/software/uncharted/terarium/hmiserver/service/tasks/TaskServiceTest.java
+++ b/packages/server/src/test/java/software/uncharted/terarium/hmiserver/service/tasks/TaskServiceTest.java
@@ -108,7 +108,7 @@ public void testItCanSendGoLLMModelCardRequest() throws Exception {
final TaskRequest req = new TaskRequest();
req.setType(TaskType.GOLLM);
- req.setScript("gollm_task:model_card");
+ req.setScript("gollm:model_card");
req.setInput(content.getBytes());
final TaskResponse resp = taskService.runTaskSync(req);
@@ -131,7 +131,7 @@ public void testItCanSendGoLLMEnrichAMRRequest() throws Exception {
final TaskRequest req = new TaskRequest();
req.setType(TaskType.GOLLM);
- req.setScript("gollm_task:enrich_amr");
+ req.setScript("gollm:enrich_amr");
req.setInput(input);
final TaskResponse resp = taskService.runTaskSync(req);
@@ -150,7 +150,7 @@ static class AdditionalProps {
public void testItCanSendGoLLMEmbeddingRequest() throws Exception {
final TaskRequest req = new TaskRequest();
req.setType(TaskType.GOLLM);
- req.setScript("gollm_task:embedding");
+ req.setScript("gollm:embedding");
req.setInput(
("{\"text\":\"What kind of dinosaur is the coolest?\",\"embedding_model\":\"text-embedding-ada-002\"}").getBytes()
);
@@ -302,7 +302,7 @@ public void testItCanSendGoLLMConfigFromDatasetRequest() throws Exception {
final TaskRequest req = new TaskRequest();
req.setType(TaskType.GOLLM);
- req.setScript("gollm_task:configure_model_from_dataset");
+ req.setScript("gollm:configure_model_from_dataset");
req.setInput(content.getBytes());
final TaskResponse resp = taskService.runTaskSync(req);
@@ -329,7 +329,7 @@ public void testItCanSendGoLLMInterventionsFromDocumentRequest() throws Exceptio
final TaskRequest req = new TaskRequest();
req.setType(TaskType.GOLLM);
- req.setScript("gollm_task:interventions_from_document");
+ req.setScript("gollm:interventions_from_document");
req.setInput(input);
final TaskResponse resp = taskService.runTaskSync(req);
diff --git a/packages/taskrunner/gradle/wrapper/gradle-wrapper.properties b/packages/taskrunner/gradle/wrapper/gradle-wrapper.properties
index 9355b41557..df97d72b8b 100644
--- a/packages/taskrunner/gradle/wrapper/gradle-wrapper.properties
+++ b/packages/taskrunner/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
diff --git a/packages/text_extraction/tasks/extract_text.py b/packages/text_extraction/tasks/extract_text.py
index c4b0eb3281..3444f0f6e6 100644
--- a/packages/text_extraction/tasks/extract_text.py
+++ b/packages/text_extraction/tasks/extract_text.py
@@ -28,8 +28,9 @@ def delete_temp_file(name):
def extract_text_from_pdf(filename):
reader = PyPDF2.PdfReader(filename)
text = ""
+ text = []
for page in reader.pages:
- text += page.extract_text()
+ text.append(page.extract_text())
return text
diff --git a/testing/manual/compare-models.md b/testing/manual/compare-models.md
index ace4f5bd53..bd35121b29 100644
--- a/testing/manual/compare-models.md
+++ b/testing/manual/compare-models.md
@@ -29,40 +29,38 @@ Report any issues into GitHub: [open an issue](https://github.com/DARPA-ASKEM/te
- The overview begins to generate
- The model graphs are displayed
7. When the overview is generated, it should compare the structure of the AMR models.
- - It should have an overview of the models
- - It should look at the semantic information of all the models
- - It should state that there is a lack of metadata information to make other comparisons
- - It should have a meaningful conclusion
+ - It should have a title that states it is comparing all 3 models
+ - It should have a summary of the comparison
+ - It should only compare the structure of the models
### 3. Compare models with partial model cards
1. Enrich the Bertozzi2020 and Fang2020 models with their respective documents
-2. Create a new "Compare models" operator
-4. Attach the Bertozzi2020, Fang2020, and Tang2020 models to the new "Compare models" operator
-5. Open the drilldown
-6. Ensure that when the drill-down starts:
+2. Re-open the "Compare models" operator
+3. Open the drilldown
+4. Ensure that when the drill-down starts:
- The overview begins to generate
- The model graphs are displayed
- Model card information is displayed for Bertozzi2020 and Fang2020
-7. When the overview is generated, it should compare both the structure of the AMR models and the metadata information.
- - It should have an overview of the models
- - It should look at semantic information of all the models
- - It should compare the metadata information for the Bertozzi2020 and Fang2020 models
- - It should have a meaningful conclusion
+5. When the overview is generated, it should compare both the structure of the AMR models and the metadata information.
+ - It should have a title that states it is comparing all 3 models
+ - It should have a summary of the comparison
+ - It should compare the structure of ALL the models
+ - It should compare the metadata information for Bertozzi2020 and Fang2020 only
### 4. Compare models all with model cards
1. Finally, enrich the Tang2020 models with its respective document
-2. Create a new "Compare models" operator
-4. Attach the Bertozzi2020, Fang2020, and Tang2020 models to the new "Compare models" operator
-5. Open the drilldown
-6. Ensure that when the drill-down starts:
- - The overview begins to generate
- - The model graphs are displayed
- - Model card information is displayed for all models
-7. When the overview is generated, it should only compare the metadata information from each model and not look at the structure.
- - It should have an overview of the models
- - It should compare the metadata and semantic information for all models
- - It should have a meaningful conclusion
+2. Re-open the "Compare models" operator
+3. Open the drilldown
+4. Ensure that when the drill-down starts:
+ - The overview begins to generate
+ - The model graphs are displayed
+ - Model card information is displayed for all models
+5. When the overview is generated, it should only compare the metadata information from each model and not look at the structure.
+ - It should have a title that states it is comparing all 3 models
+ - It should have a summary of the comparison
+ - It should compare the structure of ALL the models
+ - It should compare the metadata information for ALL the models
### 5. Working with the Compare models drilldown: NOTEBOOK
1. Switch to the **Notebook** tab.