diff --git a/src/api/agents/product/product.prompty b/src/api/agents/product/product.prompty index 2a989866..dd3d7ba2 100644 --- a/src/api/agents/product/product.prompty +++ b/src/api/agents/product/product.prompty @@ -7,12 +7,18 @@ model: api: chat configuration: type: azure_openai - azure_deployment: gpt-4-evals - api_version: 2023-07-01-preview + azure_deployment: gpt-4o-mini + api_version: 2024-06-01 parameters: max_tokens: 1500 + response_format: + type: json_object sample: context: Can you use a selection of sports and outdoor cooking gear as context? +inputs: + context: + type: string + --- system: @@ -48,4 +54,5 @@ Only output the full array of specialized queries to make to the search index. L yoursef to 5 queries. user: -{{context}} +context: {{context}} +queries: diff --git a/src/api/agents/product/product.py b/src/api/agents/product/product.py index a4ad1788..f3adf408 100644 --- a/src/api/agents/product/product.py +++ b/src/api/agents/product/product.py @@ -87,7 +87,11 @@ def retrieve_products(items, index_name): def find_products(context: str) -> Dict[str, any]: # Get product queries queries = prompty.execute("product.prompty", inputs={"context":context}) + qs = json.loads(queries) + if "queries" in qs.keys(): + qs = qs["queries"] + print("Agent suggested 5 product queries:", qs) # Generate embeddings items = generate_embeddings(qs) # Retrieve products diff --git a/src/api/agents/researcher/researcher.prompty b/src/api/agents/researcher/researcher.prompty index d21e50a8..93faac81 100644 --- a/src/api/agents/researcher/researcher.prompty +++ b/src/api/agents/researcher/researcher.prompty @@ -9,13 +9,18 @@ model: api: chat configuration: type: azure_openai - azure_deployment: gpt-35-turbo - api_version: 2023-07-01-preview + azure_deployment: gpt-4o-mini + api_version: 2024-06-01 parameters: tools: ${file:functions.json} sample: instructions: Can you find the latest camping trends and what folks are doing in the winter? feedback: Can you dig find some information about the latest camping trends and what folks are doing in the winter? +inputs: + instructions: + type: string + feedback: + type: string --- system: # Researcher Agent diff --git a/src/api/agents/researcher/researcher.py b/src/api/agents/researcher/researcher.py index bc6ad270..5feb10cf 100644 --- a/src/api/agents/researcher/researcher.py +++ b/src/api/agents/researcher/researcher.py @@ -41,7 +41,10 @@ def find_information(query, market="en-US"): {"url": a["url"], "name": a["name"], "description": a["snippet"]} for a in items["webPages"]["value"] ] - related = [a["text"] for a in items["relatedSearches"]["value"]] + # set defaults + related = [""] * len(items) + if "relatedSearches" in items: + related = [a["text"] for a in items["relatedSearches"]["value"]] return {"pages": pages, "related": related} diff --git a/src/api/agents/writer/writer.prompty b/src/api/agents/writer/writer.prompty index 71cbb446..690c6915 100644 --- a/src/api/agents/writer/writer.prompty +++ b/src/api/agents/writer/writer.prompty @@ -7,8 +7,8 @@ model: api: chat configuration: type: azure_openai - azure_deployment: gpt-4-evals - api_version: 2023-07-01-preview + azure_deployment: gpt-4o-mini + api_version: 2024-06-01 parameters: max_tokens: 2000 sample: diff --git a/src/api/evaluate/eval_inputs.jsonl b/src/api/evaluate/eval_inputs.jsonl index 6b2141ce..b827323d 100644 --- a/src/api/evaluate/eval_inputs.jsonl +++ b/src/api/evaluate/eval_inputs.jsonl @@ -1,3 +1,11 @@ -{"research_context": "Can you find the latest camping trends and what folks are doing in the winter?", "product_context": "Can you use a selection of tents and sleeping bags as context?", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be roughly 500 words long."} -{"research_context": "Can you find the latest trends in hiking shoes?" , "product_context":"Can you use a selection of hiking shoes as context?", "assignment_context": "Write an article about the best kind of hiking shoes. The article should include the product information. The article should be roughly 500 words long."} -{"research_context": "Find information about the best snow camping spots in the world","product_context":"Can you use a selection of tents that are good for snow as context?", "assignment_context": "Write an article about the best kind of tents for snow camping. The article should be roughly 500 words long."} \ No newline at end of file +{"research_context": "Can you find the latest camping trends and what folks are doing in the winter?", "product_context": "Can you use a selection of tents and sleeping bags as context?", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "Can you investigate the rise of glamping and its impact on traditional camping?", "product_context": "Include examples of luxury tents and high-end sleeping bags in your discussion.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "What are the latest trends in family camping and how are families choosing their gear?", "product_context": "Use a variety of family-sized tents and sleeping bags as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "Can you analyze the trend of digital detox camping and how it influences gear choices?", "product_context": "Include examples of tents and sleeping bags that promote a tech-free experience.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "Can you delve into the trend of adventure camping and how it influences gear choices for extreme conditions?", "product_context": "Use a selection of rugged tents and specialized sleeping bags as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "What are the current trends in pet-friendly camping and how are campers choosing gear that accommodates their furry friends?", "product_context": "Include examples of pet-friendly tents and sleeping bags.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "What are the emerging trends in camping technology and how are they changing the way campers choose their gear?", "product_context": "Include examples of tech-integrated tents and sleeping bags.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "Can you explore the trend of eco-conscious hiking and how it influences gear choices?", "product_context": "Use a selection of sustainable hiking apparel and gear as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "What are the current trends in solo hiking and how are individuals choosing their gear for safety and comfort?", "product_context": "Include examples of solo-friendly tents and compact cooking gear.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "Can you analyze the rise of family hiking and how families are selecting gear that accommodates all ages?", "product_context": "Use a variety of family-sized backpacks and kid-friendly hiking gear as examples.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} +{"research_context": "What are the emerging trends in hiking technology and how are they changing the way hikers choose their gear?", "product_context": "Include examples of GPS devices and smart hiking apparel.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."} \ No newline at end of file diff --git a/src/api/evaluate/evaluate.py b/src/api/evaluate/evaluate.py index 690e6e34..d178101c 100644 --- a/src/api/evaluate/evaluate.py +++ b/src/api/evaluate/evaluate.py @@ -43,7 +43,7 @@ def evaluate_remote(data_path): model_config = default_connection.to_evaluator_model_config(deployment_name=deployment_name, api_version=api_version) # Create an evaluation evaluation = Evaluation( - display_name="Remote Evaluation", + display_name="Cloud Evaluation", description="Evaluation of dataset", data=Dataset(id=data_id), evaluators={ @@ -141,15 +141,25 @@ def evaluate_orchestrator(model_config, project_scope, data_path): data = [] eval_data = [] + import time + start = time.time() print(f"\n===== Creating articles to evaluate using data provided in {data_path}") print("") + num_retries = 3 with open(data_path) as f: for num, line in enumerate(f): row = json.loads(line) data.append(row) print(f"generating article {num +1}") - eval_data.append(run_orchestrator(row["research_context"], row["product_context"], row["assignment_context"])) - + for i in range(num_retries): + try: + eval_data.append(run_orchestrator(row["research_context"], row["product_context"], row["assignment_context"])) + break + except Exception as e: + print("Agents failed to produce an article. Examine trace for details. Error message:" + str(e) + f"\Retrying {i+1}/{num_retries} times.") + continue + end = time.time() + print(f"Agent finished writing articles in {end-start} seconds.") # write out eval data to a file so we can re-run evaluation on it with jsonlines.open(folder + '/eval_data.jsonl', 'w') as writer: for row in eval_data: @@ -299,6 +309,7 @@ def make_image_message(url_path): resized_image_urls = [] for image in image_path: new_image = local_image_resize(image) + if new_image is None: continue #get the file type _, extension = os.path.splitext(new_image) # Normalize the extension (e.g., .JPG -> jpg) @@ -444,7 +455,7 @@ def make_image_message(url_path): img_paths = [] # This is code to add an image from a file path - for image_num in range(1,4): + for image_num in range(1, 9): parent = pathlib.Path(__file__).parent.resolve() path = os.path.join(parent, "data") image_path = os.path.join(path, f"{image_num}.png") diff --git a/src/api/evaluate/evaluators.py b/src/api/evaluate/evaluators.py index f2e77784..4bd14dc6 100644 --- a/src/api/evaluate/evaluators.py +++ b/src/api/evaluate/evaluators.py @@ -4,7 +4,7 @@ import prompty from opentelemetry import trace from opentelemetry.trace import set_span_in_context -from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator +from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator, RetrievalEvaluator from azure.ai.evaluation import ViolenceEvaluator, HateUnfairnessEvaluator, SelfHarmEvaluator, SexualEvaluator from azure.ai.evaluation import evaluate from azure.ai.evaluation import ViolenceMultimodalEvaluator, SelfHarmMultimodalEvaluator, HateUnfairnessMultimodalEvaluator, SexualMultimodalEvaluator @@ -56,14 +56,19 @@ def __call__(self, response): class ArticleEvaluator: def __init__(self, model_config, project_scope): self.evaluators = { + # RAG metrics + "groundedness": GroundednessEvaluator(model_config), + "retrieval": RetrievalEvaluator(model_config), "relevance": RelevanceEvaluator(model_config), + # business writing metrics "fluency": FluencyEvaluator(model_config), "coherence": CoherenceEvaluator(model_config), - "groundedness": GroundednessEvaluator(model_config), + # safety metrics "violence": ViolenceEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()), "hate_unfairness": HateUnfairnessEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()), "self_harm": SelfHarmEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()), "sexual": SexualEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()), + # custom evaluator for friendly tones "friendliness": FriendlinessEvaluator(), } self.project_scope = project_scope @@ -73,6 +78,7 @@ def __call__(self, *, data_path, **kwargs): ## NOTE: - The following code expects that the user has Storage Blob Data Contributor permissions in order for the results to upload to the Azure AI Studio. result = evaluate( data=data_path, + evaluation_name="Local Evaluation", evaluators=self.evaluators, ## NOTE: If you do not have Storage Blob Data Contributor permissions, please comment out the below line of code. azure_ai_project=self.project_scope,