diff --git a/pyproject.toml b/pyproject.toml index 8402fe7ff32..4a50b4bbf44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,8 +84,11 @@ core = ["skema[img2mml]", "skema[isa]", "skema[tr]", "skema[metal]"] # see skema/img2mml/render_mml/mathpix_annotator annotations = ["matplotlib", "notebook"] +# for llm use in skema +llms = ["langchain==0.0.325"] + # all extras -all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]"] +all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]", "skema[llms]"] [tool.setuptools.package-dir] "skema.gromet" = "skema/gromet" diff --git a/skema/rest/api.py b/skema/rest/api.py index c90080417a3..435bcc56fec 100644 --- a/skema/rest/api.py +++ b/skema/rest/api.py @@ -7,6 +7,7 @@ integrated_text_reading_proxy, morae_proxy, metal_proxy, + llm_proxy, ) from skema.img2mml import eqn2mml from skema.skema_py import server as code2fn @@ -110,6 +111,12 @@ tags=["morae", "skema-rs"], ) +app.include_router( + llm_proxy.router, + prefix="/morae", + tags=["morae"], +) + app.include_router( integrated_text_reading_proxy.router, prefix="/text-reading", diff --git a/skema/rest/llm_proxy.py b/skema/rest/llm_proxy.py new file mode 100644 index 00000000000..8a3b8d015c8 --- /dev/null +++ b/skema/rest/llm_proxy.py @@ -0,0 +1,147 @@ +from langchain.chat_models import ChatOpenAI +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) +from langchain.output_parsers import ( + StructuredOutputParser, + ResponseSchema +) +from fastapi import APIRouter, FastAPI, File, UploadFile +from io import BytesIO +from zipfile import ZipFile +import requests +from pathlib import Path +from pydantic import BaseModel, Field +from typing import List, Optional +from skema.rest.proxies import SKEMA_OPENAI_KEY + +router = APIRouter() + +class Dynamics(BaseModel): + """ + Dynamics Data Model for capturing dynamics within a CodeFile. + """ + + name: Optional[str] = Field(description="Name of the dynamics section.") + description: Optional[str] = Field(description="Description of the dynamics.") + block: List[str] = Field( + description="A list containing strings indicating the line numbers in the file that contain the dynamics, e.g., ['L205-L213', 'L225-L230']." + ) + +@router.post( + "/linespan-given-filepaths-zip", + summary=( + "Send a zip file containing a code file," + " get a line span of the dynamics back." + ), +) +async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan: + """ + Endpoint for generating a line span containing the dynamics from a zip archive. Currently + it only expects there to be one python file in the zip. There can be other files, such as a + README.md, but only one .py. Future versions will generalize support to arbritary zip contents. + + ### Python example + ``` + import requests + + files = { + "zip_file": open(zip_path, "rb"), + } + + response = requests.post(f"{ENDPOINT}/morae/linespan-given-filepaths-zip", files=files) + gromet_json = response.json() + """ + files=[] + blobs=[] + block=[] + with ZipFile(BytesIO(zip_file.file.read()), "r") as zip: + for file in zip.namelist(): + file_obj = Path(file) + if file_obj.suffix in [".py"]: + files.append(file) + blobs.append(zip.open(file).read()) + + # read in the code, for the prompt + code = blobs[0].decode("utf-8") # needs to be regular string, not byte string + file = files[0] + # json for the fn construction + single_snippet_payload = { + "files": [file], + "blobs": [code], + } + + # this is the formatting instructions + response_schemas = [ + ResponseSchema(name="model_function", description="The name of the function that contains the model dynamics") + ] + + # for structured output parsing, converts schema to langhchain object + output_parser = StructuredOutputParser.from_response_schemas(response_schemas) + + # for structured output parsing, makes the instructions to be passed as a variable to prompt template + format_instructions = output_parser.get_format_instructions() + + # low temp as is not generative + temperature = 0.1 + + # initialize the models + openai = ChatOpenAI( + temperature=temperature, + model_name='gpt-3.5-turbo', + openai_api_key=SKEMA_OPENAI_KEY + ) + + # construct the prompts + template="You are a assistant that answers questions about code." + system_message_prompt = SystemMessagePromptTemplate.from_template(template) + human_template="Find the function that contains the model dynamics in {code} \n{format_instructions}" + human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) + + # combining the templates for a chat template + chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + # formatting the prompt with input variables + formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages() + + # running the model + output = openai(formatted_prompt) + + # parsing the output + try: + parsed_output = output_parser.parse(output.content) + + function_name = parsed_output['model_function'] + + # Get the FN from it + url = "https://api.askem.lum.ai/code2fn/fn-given-filepaths" + response_zip = requests.post(url, json=single_snippet_payload) + + # get metadata entry for function + for entry in response_zip.json()['modules'][0]['fn_array']: + try: + if entry['b'][0]['name'][0:len(function_name)] == function_name: + metadata_idx = entry['b'][0]['metadata'] + except: + None + + # get line span using metadata + for (i,metadata) in enumerate(response_zip.json()['modules'][0]['metadata_collection']): + if i == (metadata_idx - 1): + line_begin = metadata[0]['line_begin'] + line_end = metadata[0]['line_end'] + except: + print("Failed to parse dynamics") + line_begin = 0 + line_end = 0 + + block.append(f"L{line_begin}-L{line_end}") + + output = Dynamics(name=None, description=None, block=block) + return output + + +app = FastAPI() +app.include_router(router) \ No newline at end of file diff --git a/skema/rest/proxies.py b/skema/rest/proxies.py index 502a355b6c9..2a61fcefd25 100644 --- a/skema/rest/proxies.py +++ b/skema/rest/proxies.py @@ -7,7 +7,7 @@ # MORAE etc SKEMA_RS_ADDESS = os.environ.get("SKEMA_RS_ADDRESS", "https://skema-rs.askem.lum.ai") - +SKEMA_OPENAI_KEY = os.environ.get("SKEMA_OPENAI_KEY", "YOU_FORGOT_TO_SET_SKEMA_OPENAI_KEY") # MathJAX service SKEMA_MATHJAX_PROTOCOL = os.environ.get("SKEMA_MATHJAX_PROTOCOL", "http://") @@ -24,4 +24,3 @@ SKEMA_TR_ADDRESS = os.environ.get("SKEMA_TR_ADDRESS", "http://hopper.sista.arizona.edu") OPENAI_KEY = os.environ.get("OPENAI_KEY", "YOU_FORGOT_TO_SET_OPENAI_KEY") COSMOS_ADDRESS = os.environ.get("COSMOS_ADDRESS", "https://xdd.wisc.edu/cosmos_service") - diff --git a/skema/rest/tests/test_llms.py b/skema/rest/tests/test_llms.py new file mode 100644 index 00000000000..b95691db217 --- /dev/null +++ b/skema/rest/tests/test_llms.py @@ -0,0 +1,51 @@ +from langchain.chat_models import ChatOpenAI +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) +from langchain.output_parsers import ( + StructuredOutputParser, + ResponseSchema +) +import langchain.schema +from skema.rest.proxies import SKEMA_OPENAI_KEY + +def test_prompt_construction(): + """Tests prompt template instantiation""" + # TODO: your assertion here that the template instantiation returns a string/valid type + + code = "def sir(\n s: float, i: float, r: float, beta: float, gamma: float, n: float\n) -> Tuple[float, float, float]:\n \"\"\"The SIR model, one time step.\"\"\"\n s_n = (-beta * s * i) + s\n i_n = (beta * s * i - gamma * i) + i\n r_n = gamma * i + r\n scale = n / (s_n + i_n + r_n)\n return s_n * scale, i_n * scale, r_n * scale" + + # this is the formatting instructions + response_schemas = [ + ResponseSchema(name="model_function", description="The name of the function that contains the model dynamics") + ] + + # for structured output parsing, converts schema to langhchain object + output_parser = StructuredOutputParser.from_response_schemas(response_schemas) + + # for structured output parsing, makes the instructions to be passed as a variable to prompt template + format_instructions = output_parser.get_format_instructions() + + # construct the prompts + template="You are a assistant that answers questions about code." + system_message_prompt = SystemMessagePromptTemplate.from_template(template) + human_template="Find the function that contains the model dynamics in {code} \n{format_instructions}" + human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) + + # combining the templates for a chat template + chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + # formatting the prompt with input variables + formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages() + + # mockes the output from the model + output_mock = langchain.schema.messages.AIMessage(content='```json\n{\n\t"model_function": "sir"\n}\n```',additional_kwargs={}, example=False ) + + parsed_output = output_parser.parse(output_mock.content) + + assert isinstance(parsed_output['model_function'], str) + assert isinstance(formatted_prompt[0], langchain.schema.messages.SystemMessage) + assert isinstance(formatted_prompt[1], langchain.schema.messages.HumanMessage) +