Added support for o1-preview and o1-mini models.

carusyte · Nov 13, 2024 · e2d856f · e2d856f
1 parent 43b436f
commit e2d856f
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 35 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - 2024-11-13
+
+- Added support for o1-preview and o1-mini models.
+
 ## [0.2.4] - 2024-05-27
 
 - Added support for gpt-4o model and 2024-05-01-preview API version.

diff --git a/package.json b/package.json
@@ -3,7 +3,7 @@
   "displayName": "GAI Choy",
   "description": "G̲enerative A̲I̲ empowered, C̲ode H̲elper O̲n Y̲our side. Yet another Copilot for coding, with built-in integration capability with Azure OpenAI models or, local LLM such as CodeShell.",
   "publisher": "carusyte",
-  "version": "0.2.4",
+  "version": "0.3.0",
   "icon": "assets/logo.png",
   "pricing": "Free",
   "keywords": [
@@ -62,7 +62,9 @@
             "gpt-35-turbo-16k",
             "gpt-4",
             "gpt-4-32k",
-            "gpt-4o"
+            "gpt-4o",
+            "o1-mini",
+            "o1-preview"
           ],
           "type": "string",
           "order": 4
@@ -75,17 +77,25 @@
             "gpt-35-turbo-16k",
             "gpt-4",
             "gpt-4-32k",
-            "gpt-4o"
+            "gpt-4o",
+            "o1-mini",
+            "o1-preview"
           ],
           "type": "string",
           "order": 5
         },
+        "GAIChoy.JsonMode": {
+          "description": "Whether to enforce JSON mode in response. Some models such as o1 may not support this mode.",
+          "default": false,
+          "type": "boolean",
+          "order": 6
+        },
         "GAIChoy.ApiKey": {
           "description": "Set the API key for Azure OpenAI service",
           "type": "null",
           "scope": "application",
           "markdownDescription": "[Set API Key](command:gaichoy.set_api_key)",
-          "order": 6
+          "order": 7
         },
         "GAIChoy.ApiVersion": {
           "description": "The API version for Azure OpenAI.",
@@ -98,28 +108,30 @@
             "2023-09-01-preview",
             "2023-12-01-preview",
             "2024-02-01",
-            "2024-05-01-preview"
+            "2024-05-01-preview",
+            "2024-08-01-preview",
+            "2024-10-21"
           ],
           "type": "string",
-          "order": 7
+          "order": 8
         },
         "GAIChoy.ApiParameters": {
           "description": "The API parameters for Azure OpenAI. Format: key=value pairs delimited by semicolons.",
           "default": "temperature=0.2",
           "type": "string",
-          "order": 8
+          "order": 9
         },
         "GAIChoy.ApiTimeout": {
           "description": "The timeout in seconds before API call stops waiting or retrying.",
           "type": "number",
           "default": 60,
-          "order": 9
+          "order": 10
         },
         "GAIChoy.AutoTriggerCompletion": {
           "description": "Whether or not to automatically trigger completion when typing.",
           "default": false,
           "type": "boolean",
-          "order": 10
+          "order": 11
         },
         "GAIChoy.AutoCompletionDelay": {
           "description": "The delay in seconds before automatic code completion triggers.",
@@ -130,7 +142,7 @@
             3
           ],
           "default": 2,
-          "order": 11
+          "order": 12
         },
         "GAIChoy.CompletionMaxTokens": {
           "description": "Maximum number of tokens for which suggestions will be displayed",
@@ -145,7 +157,7 @@
             4096
           ],
           "default": 64,
-          "order": 12
+          "order": 13
         },
         "GAIChoy.ChatMaxTokens": {
           "description": "Maximum number of tokens for which chat messages will be displayed",
@@ -159,20 +171,20 @@
             32768
           ],
           "default": 2048,
-          "order": 13
+          "order": 14
         },
         "GAIChoy.EnableDebugMessage": {
           "description": "Prints debug message to extension output.",
           "type": "boolean",
           "default": false,
-          "order": 14
+          "order": 15
         },
         "GAIChoy.ClearChatHistory": {
           "description": "Clear the chat history",
           "type": "null",
           "scope": "application",
           "markdownDescription": "[Clear chat history](command:gaichoy.clear_chat_history)",
-          "order": 15
+          "order": 16
         }
       }
     },

diff --git a/src/RequestEventStream.ts b/src/RequestEventStream.ts
@@ -30,7 +30,7 @@ export async function postEventStream(prompt: string, chatList: Array<ChatItem>,
     }
 
     var uri = "";
-    var body = {};
+    let body: {[key: string]: any} = {};
     var headers = {};
 
     if ("CPU with llama.cpp" == modelEnv) {
@@ -66,24 +66,25 @@ export async function postEventStream(prompt: string, chatList: Array<ChatItem>,
             "api-key": api_key
         };
         body = {
-            "temperature": 0.8,
-            "messages": [
+            temperature: 0.8,
+            messages: [
                 {
-                    "role": "system",
-                    "content": `
-                        Your role is an AI pair programming assistant and technical consultant, a programming expert with strong coding skills.
-                        Your task is to answer questions raised by the user as a developer.
-                        You can solve all kinds of programming problems.
-                        You can design projects, code structures, and code files step by step with one click.
-                        - Follow the user's requirements carefully and to the letter. If there's uncertainty, you can try rephrasing it, then extend the rephrased question, before responding.
-                        - Answer in user's natural language.
-                        - Don't use excessive line breaks between paragraphs.
-                        - First think step-by-step, describe your plan for what to build in pseudocode, written out in great detail
-                        - Then output the code in a single code block, and specify the language type for the code block.
-                        - Minimize any other prose. Be concise.
-                        - Wait for the users' instruction, be interactive to understand more about user's problem, such that you can provide effective answer.
-                        - If the response extends beyond token limit, respond in multiple responses/messages so your responses aren't cutoff. Tell the user to print next or continue.
-                        `
+                    role: model === "o1-preview" ? "user" : "system",
+                    // "content": `
+                    //     Your role is an AI pair programming assistant and technical consultant, a programming expert with strong coding skills.
+                    //     Your task is to answer questions raised by the user as a developer.
+                    //     You can solve all kinds of programming problems.
+                    //     You can design projects, code structures, and code files step by step with one click.
+                    //     - Follow the user's requirements carefully and to the letter. If there's uncertainty, you can try rephrasing it, then extend the rephrased question, before responding.
+                    //     - Answer in user's natural language.
+                    //     - Don't use excessive line breaks between paragraphs.
+                    //     - First think step-by-step, describe your plan for what to build in pseudocode, written out in great detail
+                    //     - Then output the code in a single code block, and specify the language type for the code block.
+                    //     - Minimize any other prose. Be concise.
+                    //     - Wait for the users' instruction, be interactive to understand more about user's problem, such that you can provide effective answer.
+                    //     - If the response extends beyond token limit, respond in multiple responses/messages so your responses aren't cutoff. Tell the user to print next or continue.
+                    //     `
+                    content: `You are an AI assistant named "GAI Choy" that helps people find information and satisfy their requests.`
                 }
             ]
             // "stream": true,
@@ -94,6 +95,10 @@ export async function postEventStream(prompt: string, chatList: Array<ChatItem>,
 
         AzureOAI.mergeParameters(body, parameters)
 
+        if (model === "o1-preview") {
+           delete body.temperature // this parameter is not supported by o1 model
+        }
+
         for (let item of chatList) {
             if (item.humanMessage.content.length > 0) {
                 // @ts-ignore

diff --git a/src/llm/AzureOAI.ts b/src/llm/AzureOAI.ts
@@ -32,6 +32,7 @@ export class AzureOAI {
         const api_version = workspace.getConfiguration("GAIChoy").get("ApiVersion") as string;
         const parameters = workspace.getConfiguration("GAIChoy").get("ApiParameters") as string;
         const timeout = workspace.getConfiguration("GAIChoy").get("ApiTimeout") as number;
+        const jsonMode = workspace.getConfiguration("GAIChoy").get("JsonMode") as boolean;
 
         // get API key from secret storage
         let api_key = await ExtensionResource.instance.getApiKey();
@@ -49,8 +50,8 @@ export class AzureOAI {
             "temperature": 0.2,
             "messages": [
                 {
-                    "role": "system",
-                    "content": `Your role is an AI code generator.
+                    "role": model === "o1-preview" ? "user": "system",
+                    "content": `Your role is an AI code generator. Your name is "GAI Choy".
 Your task is to provide executable and functional code fragments AS-IS, based on the context provided by the user.
 The context and metadata of the code fragment will be provided by user in the following format, as surrounded by triple-backticks.
 Actual input from user will exclude the beginning and trailing triple-backticks:
@@ -115,10 +116,14 @@ Expected response in JSON format:
         this.mergeParameters(data, parameters)
 
         // Conditionally add "response_format": {"type": "json_object"} to the data variable if api_version is newer than '2023-12-01-preview'.
-        data = api_version >= '2023-12-01-preview' ? {
+        data = jsonMode ? {
             ...data, response_format: { type: 'json_object' }
         } : data
 
+        if (model === "o1-preview") {
+            delete data.temperature // this parameter is not supported by o1 model
+        }
+
         ExtensionResource.instance.debugMessage("request.data: \n" + JSON.stringify(data))
         const uri = "/openai/deployments/" + model + "/chat/completions?api-version=" + api_version