From 0f3345748c2820be63dfde433c7d9270ac308ea6 Mon Sep 17 00:00:00 2001 From: rachhek Date: Wed, 11 Dec 2024 14:44:06 +0200 Subject: [PATCH] add new use case for research assistant --- .../research_assistant_ci_dev_workflow.yml | 42 +++++++++++ .../research_assistant_pr_dev_workflow.yml | 36 +++++++++ math_coding/experiment.yaml | 2 +- .../flows/math_standard_flow/flow.dag.yaml | 2 - math_coding/tests/test_env.py | 45 +++++++++++ math_coding/tests/test_math_evaluation.py | 74 +++++++++++++++++++ .../configs/deployment_config.json | 56 ++++++++++++++ .../data/eval_data.json | 1 + .../data/sample_data.json | 1 + .../environment/Dockerfile | 32 ++++++++ use_case_research_assistant/experiment.yaml | 30 ++++++++ .../flows/evaluation/flow.dag.yaml | 25 +++++++ .../flows/evaluation/requirements.txt | 3 + .../flows/evaluation/search_question.py | 0 .../flows/standard/QUESTION_EXPANDER.jinja2 | 6 ++ .../flows/standard/flow.dag.yaml | 32 ++++++++ .../flows/standard/requirements.txt | 3 + .../flows/standard/search_question.py | 9 +++ .../sample-request.json | 3 + .../tests/test_questions.py | 6 ++ 20 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/research_assistant_ci_dev_workflow.yml create mode 100644 .github/workflows/research_assistant_pr_dev_workflow.yml create mode 100644 math_coding/tests/test_env.py create mode 100644 math_coding/tests/test_math_evaluation.py create mode 100644 use_case_research_assistant/configs/deployment_config.json create mode 100644 use_case_research_assistant/data/eval_data.json create mode 100644 use_case_research_assistant/data/sample_data.json create mode 100644 use_case_research_assistant/environment/Dockerfile create mode 100644 use_case_research_assistant/experiment.yaml create mode 100644 use_case_research_assistant/flows/evaluation/flow.dag.yaml create mode 100644 use_case_research_assistant/flows/evaluation/requirements.txt create mode 100644 use_case_research_assistant/flows/evaluation/search_question.py create mode 100644 use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2 create mode 100644 use_case_research_assistant/flows/standard/flow.dag.yaml create mode 100644 use_case_research_assistant/flows/standard/requirements.txt create mode 100644 use_case_research_assistant/flows/standard/search_question.py create mode 100755 use_case_research_assistant/sample-request.json create mode 100644 use_case_research_assistant/tests/test_questions.py diff --git a/.github/workflows/research_assistant_ci_dev_workflow.yml b/.github/workflows/research_assistant_ci_dev_workflow.yml new file mode 100644 index 000000000..489c70e7f --- /dev/null +++ b/.github/workflows/research_assistant_ci_dev_workflow.yml @@ -0,0 +1,42 @@ +name: math_coding_ci_dev_workflow + +on: + workflow_call: + inputs: + env_name: + type: string + description: "Execution Environment" + required: true + default: "dev" + use_case_base_path: + type: string + description: "The flow usecase to execute" + required: true + default: "use_case_research_assistant" + deployment_type: + type: string + description: "Determine type of deployment - aml, aks, docker, webapp" + required: true + push: + branches: + - main + - development + paths: + - 'use_case_research_assistant/**' + - '.github/**' + - 'llmops/**' + +#===================================== +# Execute platform_ci_dev_workflow workflow for experiment, evaluation and deployment of flows +#===================================== +jobs: + execute-platform-flow-ci: + uses: ./.github/workflows/platform_ci_dev_workflow.yml + with: + env_name: ${{ inputs.env_name || 'dev'}} + use_case_base_path: ${{ inputs.use_case_base_path || 'use_case_research_assistant' }} + deployment_type: ${{ inputs.deployment_type|| 'aml' }} + secrets: + azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} + registry_details: ${{ secrets.DOCKER_IMAGE_REGISTRY }} + env_vars: ${{ secrets.ENV_VARS }} diff --git a/.github/workflows/research_assistant_pr_dev_workflow.yml b/.github/workflows/research_assistant_pr_dev_workflow.yml new file mode 100644 index 000000000..71791c3fc --- /dev/null +++ b/.github/workflows/research_assistant_pr_dev_workflow.yml @@ -0,0 +1,36 @@ +name: math_coding_pr_dev_workflow + +on: + workflow_call: + inputs: + env_name: + type: string + description: "Execution Environment" + required: true + default: "dev" + use_case_base_path: + type: string + description: "The flow usecase to execute" + required: true + default: "use_case_research_assistant" + pull_request: + branches: + - main + - development + paths: + - 'math_coding/**' + - '.github/**' + - 'llmops/**' + +#===================================== +# Execute platform_pr_dev_workflow workflow for experiment, evaluation and deployment of flows +#===================================== +jobs: + execute-platform-pr-workflow: + uses: ./.github/workflows/platform_pr_dev_workflow.yml + with: + env_name: ${{ inputs.env_name || 'pr'}} + use_case_base_path: ${{ inputs.flow_type || 'use_case_research_assistant' }} + secrets: + azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} + env_vars: ${{ secrets.ENV_VARS }} diff --git a/math_coding/experiment.yaml b/math_coding/experiment.yaml index fbaa5a69c..e21e999fd 100644 --- a/math_coding/experiment.yaml +++ b/math_coding/experiment.yaml @@ -6,7 +6,7 @@ connections: connection_type: AzureOpenAIConnection api_base: https://edge-10x-ai-services.cognitiveservices.azure.com api_version: 2023-07-01-preview - api_key: ${AZURE_OPENAI_API_KEY} + api_key: ${api_key} api_type: azure datasets: diff --git a/math_coding/flows/math_standard_flow/flow.dag.yaml b/math_coding/flows/math_standard_flow/flow.dag.yaml index 8cb21cd17..b7719d19f 100644 --- a/math_coding/flows/math_standard_flow/flow.dag.yaml +++ b/math_coding/flows/math_standard_flow/flow.dag.yaml @@ -39,8 +39,6 @@ nodes: type: code path: ask_llm.jinja2 inputs: - # This is to easily switch between openai and azure openai. - # deployment_name is required by azure openai, model is required by openai. deployment_name: gpt-35-turbo model: gpt-3.5-turbo question: ${inputs.math_question} diff --git a/math_coding/tests/test_env.py b/math_coding/tests/test_env.py new file mode 100644 index 000000000..c2bf8decb --- /dev/null +++ b/math_coding/tests/test_env.py @@ -0,0 +1,45 @@ +import os +from promptflow.client import PFClient + +def test_env_variables(): + # Initialize PFClient + pf = PFClient() + + # List all connections + print("\nListing all connections:") + try: + connections = pf.connections.list() + for conn in connections: + print(f"Connection name: {conn.name}") + print(f"Connection type: {conn.type}") + print(f"Connection configs: {conn.configs}") + print("---") + except Exception as e: + print("Error listing connections:", str(e)) + + print("\nChecking environment variables:") + print("AZURE_OPENAI_API_KEY:", bool(os.getenv("AZURE_OPENAI_API_KEY"))) + print("AZURE_OPENAI_ENDPOINT:", os.getenv("AZURE_OPENAI_ENDPOINT")) + + # Test connection to Azure OpenAI + from openai import AzureOpenAI + + print("\nTesting Azure OpenAI connection:") + client = AzureOpenAI( + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version="2023-07-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + # Try a simple completion + try: + response = client.chat.completions.create( + model="gpt-35-turbo", # Using the deployment name from your flow.dag.yaml + messages=[{"role": "user", "content": "Hello!"}] + ) + print("Connection successful!") + except Exception as e: + print("Connection failed:", str(e)) + +if __name__ == "__main__": + test_env_variables() \ No newline at end of file diff --git a/math_coding/tests/test_math_evaluation.py b/math_coding/tests/test_math_evaluation.py new file mode 100644 index 000000000..ea5e480e4 --- /dev/null +++ b/math_coding/tests/test_math_evaluation.py @@ -0,0 +1,74 @@ +from promptflow.client import PFClient + +def test_math_evaluation_flow(): + # Initialize the Promptflow client + pf = PFClient() + + # Path to your flow + flow_path = "../flows/math_evaluation_flow" + + # Test case 1: Correct prediction + result1 = pf.test(flow=flow_path, inputs={ + "groundtruth": "3.14", + "prediction": "3.14" + }) + print("\nTest 1 - Exact match:") + print(f"Score: {result1['score']}") + + # Test case 2: Close enough prediction (rounds to same value) + result2 = pf.test(flow=flow_path, inputs={ + "groundtruth": "3.14", + "prediction": "3.141592" + }) + print("\nTest 2 - Close enough:") + print(f"Score: {result2['score']}") + + # Test case 3: Wrong prediction + result3 = pf.test(flow=flow_path, inputs={ + "groundtruth": "3.14", + "prediction": "3.15" + }) + print("\nTest 3 - Wrong answer:") + print(f"Score: {result3['score']}") + + # Test case 4: Error case + result4 = pf.test(flow=flow_path, inputs={ + "groundtruth": "3.14", + "prediction": "JSONDecodeError" + }) + print("\nTest 4 - Error case:") + print(f"Score: {result4['score']}") + + # Test batch processing + test_data = [ + {"groundtruth": "1.0", "prediction": "1.0"}, + {"groundtruth": "2.0", "prediction": "2.01"}, + {"groundtruth": "3.14", "prediction": "3.14159"}, + {"groundtruth": "4.0", "prediction": "JSONDecodeError"}, + ] + + batch_result = pf.test( + flow=flow_path, + inputs=test_data, + ) + + print("\nBatch Processing Results:") + # Print all available keys in batch_result + print("Available keys:", batch_result.keys() if hasattr(batch_result, 'keys') else "Result is not a dict") + + # Try different ways to access metrics + try: + if hasattr(batch_result, 'metrics'): + print(f"Metrics (as attribute): {batch_result.metrics}") + elif isinstance(batch_result, dict) and 'metrics' in batch_result: + print(f"Metrics (as dict key): {batch_result['metrics']}") + elif isinstance(batch_result, dict) and 'output' in batch_result: + print(f"Output: {batch_result['output']}") + else: + print("Raw batch result:", batch_result) + except Exception as e: + print(f"Error accessing metrics: {str(e)}") + print("Raw batch result:", batch_result) + +if __name__ == "__main__": + test_math_evaluation_flow() \ No newline at end of file diff --git a/use_case_research_assistant/configs/deployment_config.json b/use_case_research_assistant/configs/deployment_config.json new file mode 100644 index 000000000..d0acd3348 --- /dev/null +++ b/use_case_research_assistant/configs/deployment_config.json @@ -0,0 +1,56 @@ +{ + "azure_managed_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "ENDPOINT_NAME": "research-assistant-1", + "ENDPOINT_DESC": "An online endpoint serving a flow for research assistant", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "research-assistant-1", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "research-assistant-1", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100", + "DEPLOYMENT_VM_SIZE": "Standard_F2s_v2", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ], + "kubernetes_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for math coding", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100, + "COMPUTE_NAME": "", + "DEPLOYMENT_VM_SIZE": "promptinstancetype", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "CPU_ALLOCATION": "", + "MEMORY_ALLOCATION": "", + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ], + "webapp_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "CONNECTION_NAMES": [""], + "REGISTRY_NAME": "", + "REGISTRY_RG_NAME": "", + "APP_PLAN_NAME": "", + "WEB_APP_NAME": "", + "WEB_APP_RG_NAME": "", + "WEB_APP_SKU": "B3", + "USER_MANAGED_ID": "" + + } + ] +} \ No newline at end of file diff --git a/use_case_research_assistant/data/eval_data.json b/use_case_research_assistant/data/eval_data.json new file mode 100644 index 000000000..2a97f2aa0 --- /dev/null +++ b/use_case_research_assistant/data/eval_data.json @@ -0,0 +1 @@ +{"question": "What's the population of Finland?", "answer": "The population of Finland is 5.5 million."} \ No newline at end of file diff --git a/use_case_research_assistant/data/sample_data.json b/use_case_research_assistant/data/sample_data.json new file mode 100644 index 000000000..2a97f2aa0 --- /dev/null +++ b/use_case_research_assistant/data/sample_data.json @@ -0,0 +1 @@ +{"question": "What's the population of Finland?", "answer": "The population of Finland is 5.5 million."} \ No newline at end of file diff --git a/use_case_research_assistant/environment/Dockerfile b/use_case_research_assistant/environment/Dockerfile new file mode 100644 index 000000000..625e9aead --- /dev/null +++ b/use_case_research_assistant/environment/Dockerfile @@ -0,0 +1,32 @@ +# syntax=docker/dockerfile:1 +FROM docker.io/continuumio/miniconda3:latest + +WORKDIR / + +COPY ./flow/requirements.txt /flow/requirements.txt + +RUN apt-get update && apt-get install -y runit gcc + +# create conda environment +RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \ + conda run -n promptflow-serve \ + pip install -r /flow/requirements.txt && \ + conda run -n promptflow-serve pip install keyrings.alt && \ + conda run -n promptflow-serve pip install gunicorn==20.1.0 && \ + conda run -n promptflow-serve pip cache purge && \ + conda clean -a -y + +COPY ./flow /flow + +EXPOSE 8080 + +COPY ./connections/* /connections/ + +# reset runsvdir +RUN rm -rf /var/runit +COPY ./runit /var/runit +# grant permission +RUN chmod -R +x /var/runit + +COPY ./start.sh / +CMD ["bash", "./start.sh"] \ No newline at end of file diff --git a/use_case_research_assistant/experiment.yaml b/use_case_research_assistant/experiment.yaml new file mode 100644 index 000000000..bd1406b77 --- /dev/null +++ b/use_case_research_assistant/experiment.yaml @@ -0,0 +1,30 @@ +name: named_entity_recognition +flow: flows/standard + +connections: +- name: aoai + connection_type: AzureOpenAIConnection + api_base: https://edge-10x-ai-services.cognitiveservices.azure.com + api_version: 2023-07-01-preview + api_key: ${api_key} + api_type: azure + +datasets: +- name: sample_research_questions_training + source: data/sample_data.json + description: "This dataset is for sample research questions." + mappings: + question: "${data.question}" + answer: "${data.answer}" + +evaluators: +- name: evaluate_research_assistant + flow: flows/evaluation + datasets: + - name: sample_research_questions_test + reference: sample_research_questions_training + source: data/eval_data.jsonl + description: "This dataset is for evaluating flows." + mappings: + ground_truth: "${data.results}" + entities: "${run.outputs.entities}" diff --git a/use_case_research_assistant/flows/evaluation/flow.dag.yaml b/use_case_research_assistant/flows/evaluation/flow.dag.yaml new file mode 100644 index 000000000..a5f875128 --- /dev/null +++ b/use_case_research_assistant/flows/evaluation/flow.dag.yaml @@ -0,0 +1,25 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json +inputs: + question: + type: string + default: "What's the population of Finland?" + answer: + type: string + default: "The population of Finland is 5.5 million." + ground_truth: + type: string + default: '"5.5 million"' +outputs: + output_answer: + type: object + reference: ${search_each_question.output} +nodes: +- name: search_question + type: python + source: + type: code + path: search_question.py + inputs: + question: ${inputs.question} +environment: + python_requirements_txt: requirements.txt \ No newline at end of file diff --git a/use_case_research_assistant/flows/evaluation/requirements.txt b/use_case_research_assistant/flows/evaluation/requirements.txt new file mode 100644 index 000000000..3cd95d693 --- /dev/null +++ b/use_case_research_assistant/flows/evaluation/requirements.txt @@ -0,0 +1,3 @@ +promptflow +promptflow-tools +promptflow-sdk[builtins] \ No newline at end of file diff --git a/use_case_research_assistant/flows/evaluation/search_question.py b/use_case_research_assistant/flows/evaluation/search_question.py new file mode 100644 index 000000000..e69de29bb diff --git a/use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2 b/use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2 new file mode 100644 index 000000000..59a0a4574 --- /dev/null +++ b/use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2 @@ -0,0 +1,6 @@ +system: +You are a research assistant. Your task is to rephrase the given question into a more specific question. + +user: +Question: {{question}} +Sub-questions: \ No newline at end of file diff --git a/use_case_research_assistant/flows/standard/flow.dag.yaml b/use_case_research_assistant/flows/standard/flow.dag.yaml new file mode 100644 index 000000000..e10d74c45 --- /dev/null +++ b/use_case_research_assistant/flows/standard/flow.dag.yaml @@ -0,0 +1,32 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json +inputs: + question: + type: string + default: What's the population of Finland? +outputs: + answer: + type: string + reference: ${research_assistant.output} +nodes: +- name: QUESTION_EXPANDER + type: llm + source: + type: code + path: QUESTION_EXPANDER.jinja2 + inputs: + deployment_name: gpt-35-turbo + max_tokens: 64 + question: ${inputs.question} + provider: AzureOpenAI + connection: aoai + api: chat + module: promptflow.tools.aoai +- name: search_question + type: python + source: + type: code + path: search_question.py + inputs: + question: ${QUESTION_EXPANDER.output} +environment: + python_requirements_txt: requirements.txt diff --git a/use_case_research_assistant/flows/standard/requirements.txt b/use_case_research_assistant/flows/standard/requirements.txt new file mode 100644 index 000000000..3cd95d693 --- /dev/null +++ b/use_case_research_assistant/flows/standard/requirements.txt @@ -0,0 +1,3 @@ +promptflow +promptflow-tools +promptflow-sdk[builtins] \ No newline at end of file diff --git a/use_case_research_assistant/flows/standard/search_question.py b/use_case_research_assistant/flows/standard/search_question.py new file mode 100644 index 000000000..1c0488d53 --- /dev/null +++ b/use_case_research_assistant/flows/standard/search_question.py @@ -0,0 +1,9 @@ +from typing import List +from promptflow.core import tool + + +@tool +def search_question(question: str) -> List[str]: + # TODO: Implement the search logic + return "helLo this is an example answer for the question: " + question + diff --git a/use_case_research_assistant/sample-request.json b/use_case_research_assistant/sample-request.json new file mode 100755 index 000000000..12f80caff --- /dev/null +++ b/use_case_research_assistant/sample-request.json @@ -0,0 +1,3 @@ +{ + "question": "What is the square of 30?" +} \ No newline at end of file diff --git a/use_case_research_assistant/tests/test_questions.py b/use_case_research_assistant/tests/test_questions.py new file mode 100644 index 000000000..6eb6c1c6c --- /dev/null +++ b/use_case_research_assistant/tests/test_questions.py @@ -0,0 +1,6 @@ +def test_math_evaluation_flow(): + # Initialize the Promptflow client + print("test") + +if __name__ == "__main__": + test_math_evaluation_flow() \ No newline at end of file