ml4ai · myedibleenso · Oct 31, 2023 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -84,8 +84,11 @@ core = ["skema[img2mml]", "skema[isa]", "skema[tr]", "skema[metal]"]
 # see skema/img2mml/render_mml/mathpix_annotator
 annotations = ["matplotlib", "notebook"]
 
+# for llm use in skema
+llms = ["langchain==0.0.325"]
+
 # all extras
-all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]"]
+all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]", "skema[llms]"]
 
 [tool.setuptools.package-dir]
 "skema.gromet" = "skema/gromet"

diff --git a/skema/rest/api.py b/skema/rest/api.py
@@ -7,6 +7,7 @@
     integrated_text_reading_proxy,
     morae_proxy,
     metal_proxy,
+    llm_proxy,
 )
 from skema.img2mml import eqn2mml
 from skema.skema_py import server as code2fn
@@ -110,6 +111,12 @@
     tags=["morae", "skema-rs"],
 )
 
+app.include_router(
+    llm_proxy.router,
+    prefix="/morae",
+    tags=["morae"],
+)
+
 app.include_router(
     integrated_text_reading_proxy.router,
     prefix="/text-reading",

diff --git a/skema/rest/llm_proxy.py b/skema/rest/llm_proxy.py
@@ -0,0 +1,137 @@
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.output_parsers import (
+    StructuredOutputParser,
+    ResponseSchema
+)
+from fastapi import APIRouter, FastAPI, File, UploadFile
+from io import BytesIO
+from zipfile import ZipFile
+import requests
+from pathlib import Path
+from pydantic import BaseModel
+from skema.rest.proxies import SKEMA_OPENAI_KEY
+
+router = APIRouter()
+
+class LineSpan(BaseModel):
+    line_begin: int
+    line_end: int
+
+
+@router.post(
+    "/linespan-given-filepaths-zip",
+    summary=(
+        "Send a zip file containing a code file,"
+        " get a line span of the dynamics back."
+    ),
+)
+async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan:
+    """
+    Endpoint for generating a line span containing the dynamics from a zip archive. Currently
+    it only expects there to be one python file in the zip. There can be other files, such as a
+    README.md, but only one .py. Future versions will generalize support to arbritary zip contents. 
+
+    ### Python example
+    ```
+    import requests
+
+    files = {
+      "zip_file": open(zip_path, "rb"),
+    }
+
+    response = requests.post(f"{ENDPOINT}/morae/linespan-given-filepaths-zip", files=files)
+    gromet_json = response.json()
+    """
+    files=[]
+    blobs=[]
+    with ZipFile(BytesIO(zip_file.file.read()), "r") as zip:
+        for file in zip.namelist():
+            file_obj = Path(file)
+            if file_obj.suffix in [".py"]:
+                files.append(file)
+                blobs.append(zip.open(file).read())
+
+    # read in the code, for the prompt
+    code = blobs[0].decode("utf-8") # needs to be regular string, not byte string
+    file = files[0]
+    # json for the fn construction
+    single_snippet_payload = {
+            "files": [file],
+            "blobs": [code],
+        }
+
+    # this is the formatting instructions
+    response_schemas = [
+        ResponseSchema(name="model_function", description="The name of the function that contains the model dynamics")
+    ]
+
+    # for structured output parsing, converts schema to langhchain object
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+
+    # for structured output parsing, makes the instructions to be passed as a variable to prompt template
+    format_instructions = output_parser.get_format_instructions()
+
+    # low temp as is not generative
+    temperature = 0.1
+
+    # initialize the models
+    openai = ChatOpenAI(
+        temperature=temperature,
+        model_name='gpt-3.5-turbo',
+        openai_api_key=SKEMA_OPENAI_KEY
+    )
+
+    # construct the prompts
+    template="You are a assistant that answers questions about code."
+    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+    human_template="Find the function that contains the model dynamics in {code} \n{format_instructions}"
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+
+    # combining the templates for a chat template
+    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+    # formatting the prompt with input variables
+    formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages()
+
+    # running the model
+    output = openai(formatted_prompt)
+
+    # parsing the output
+    try:
+        parsed_output = output_parser.parse(output.content)
+
+        function_name = parsed_output['model_function']
+
+        # Get the FN from it
+        url = "https://api.askem.lum.ai/code2fn/fn-given-filepaths"
+        response_zip = requests.post(url, json=single_snippet_payload)
+
+        # get metadata entry for function
+        for entry in response_zip.json()['modules'][0]['fn_array']:
+            try:
+                if entry['b'][0]['name'][0:len(function_name)] == function_name:
+                    metadata_idx = entry['b'][0]['metadata']
+            except:
+                None
+
+        # get line span using metadata
+        for (i,metadata) in enumerate(response_zip.json()['modules'][0]['metadata_collection']):
+            if i == (metadata_idx - 1):
+                line_begin = metadata[0]['line_begin']
+                line_end =  metadata[0]['line_end']
+    except:
+        print("Failed to parse dynamics")
+        line_begin = 0
+        line_end = 0
+
+    output = LineSpan(line_begin=line_begin,line_end=line_end)
+    return output
+
+
+app = FastAPI()
+app.include_router(router)
diff --git a/skema/rest/proxies.py b/skema/rest/proxies.py
@@ -7,7 +7,7 @@
 
 # MORAE etc
 SKEMA_RS_ADDESS = os.environ.get("SKEMA_RS_ADDRESS", "https://skema-rs.askem.lum.ai")
-
+SKEMA_OPENAI_KEY = os.environ.get("SKEMA_OPENAI_KEY", "YOU_FORGOT_TO_SET_SKEMA_OPENAI_KEY")
 
 # MathJAX service
 SKEMA_MATHJAX_PROTOCOL = os.environ.get("SKEMA_MATHJAX_PROTOCOL", "http://")
@@ -24,4 +24,3 @@
 SKEMA_TR_ADDRESS = os.environ.get("SKEMA_TR_ADDRESS", "http://hopper.sista.arizona.edu")
 OPENAI_KEY = os.environ.get("OPENAI_KEY", "YOU_FORGOT_TO_SET_OPENAI_KEY")
 COSMOS_ADDRESS = os.environ.get("COSMOS_ADDRESS",  "https://xdd.wisc.edu/cosmos_service")
-