diff --git a/docs/Endpoints/ChatCompletionAPI.md b/docs/Endpoints/ChatCompletionAPI.md new file mode 100644 index 0000000..6523261 --- /dev/null +++ b/docs/Endpoints/ChatCompletionAPI.md @@ -0,0 +1,61 @@ +--- +sidebar_position: 1 +id: chat-completion-api +title: Chat Completion API +tags: + - OpenAI API + - Chat Models +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Chat Completion API + +**The role and the content of the prompt for each role is required.** + + + + ```python showLineNumbers + model = "Llama-3.1-70B-Instruct" # choose one of available LLM (not the embedding model) + stream = True + + chat_response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant named Llama-3."}, + {"role": "user", "content": "What is Open Telekom Cloud?"}, + ], + temperature=0.1, + max_tokens=256, + stream=stream + ) + + if not stream: + print(chat_response.choices[0].message.content) + else: + for chunk in chat_response: + if chunk.choices: + if chunk.choices[0].delta.content is not None: + print(chunk.choices[0].delta.content, end="", flush=True) + ``` + + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Llama-3.1-70B-Instruct", + "messages": [ + {"role": "system", "content": "You are a helpful assistant named Llama-3."}, + {"role": "user", "content": "What is Open Telekom Cloud?"} + ], + "temperature": 0.1, + "max_tokens": 256, + "stream": true + }' + ``` + + diff --git a/docs/Endpoints/CompletionAPI.md b/docs/Endpoints/CompletionAPI.md new file mode 100644 index 0000000..d26b423 --- /dev/null +++ b/docs/Endpoints/CompletionAPI.md @@ -0,0 +1,56 @@ +--- +sidebar_position: 2 +id: completion-api +title: Completion API +tags: + - OpenAI API + - Text Models +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Completion API + +**With this API, the raw text will be sent directly to the LLM without a special tag template.** + + + + ```python showLineNumbers + model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model) + stream = True + + completion = client.completions.create( + model=model, + prompt="What is Python programming language?", + stream=stream, + temperature=0.2, + max_tokens=128 + ) + + if not stream: + print(completion.choices[0].text) + + else: + for chunk in completion: + if chunk.choices: + if chunk.choices[0].text is not None: + print(chunk.choices[0].text, end="", flush=True) + ``` + + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Llama-3.1-70B-Instruct", + "prompt": "What is Python programming language?", + "temperature": 0.2, + "max_tokens": 128, + "stream": true + }' + ``` + + diff --git a/docs/Endpoints/EmbeddingAPI.md b/docs/Endpoints/EmbeddingAPI.md new file mode 100644 index 0000000..f30c0e3 --- /dev/null +++ b/docs/Endpoints/EmbeddingAPI.md @@ -0,0 +1,42 @@ +--- +sidebar_position: 3 +id: embedding-api +title: Embedding API +tags: + - OpenAI API + - Embedding Models +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Embedding API + + + + ```python showLineNumbers + model = "jina-embeddings-v2-base-de" + + texts = ["I am Batman and I'm rich", "I am Spiderman", "I am Ironman and I'm a billionaire", "I am Flash", "I am the president of USA"] + embeddings = client.embeddings.create( + input=texts, model=model + ) + + print('Embedding dimension: ', len(embeddings.data[0].embedding)) + print('Number of embedding vectors: ', len(embeddings.data)) + print('Token usage: ', embeddings.usage) + ``` + + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/embeddings \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-embeddings-v2-base-de", + "input": ["I am Batman and I'm rich", "I am Spiderman", "I am Ironman and I'm a billionaire", "I am Flash", "I am the president of USA"] + }' + ``` + + diff --git a/docs/Endpoints/_category_.json b/docs/Endpoints/_category_.json new file mode 100644 index 0000000..e7c32c0 --- /dev/null +++ b/docs/Endpoints/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Endpoints", + "position": 5, + "link": { + "type": "generated-index", + "description": "List of available Endpoints in AI Foundation Sevices" + } +} diff --git a/docs/Model Serving/API Reference.md b/docs/Model Serving/API Reference.md index bdefb53..d3c8e09 100644 --- a/docs/Model Serving/API Reference.md +++ b/docs/Model Serving/API Reference.md @@ -49,6 +49,13 @@ pip install openai print(model.id) ``` + + + ```bash showLineNumbers + curl -X GET https://llm-server.llmhub.t-systems.net/v2/models \ + -H "Authorization: Bearer YOUR_API_KEY" + ``` + :::info @@ -124,6 +131,24 @@ That will output: print(chunk.choices[0].delta.content, end="", flush=True) ``` + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Llama-3.1-70B-Instruct", + "messages": [ + {"role": "system", "content": "You are a helpful assistant named Llama-3."}, + {"role": "user", "content": "What is Open Telekom Cloud?"} + ], + "temperature": 0.1, + "max_tokens": 256, + "stream": true + }' + ``` + ### Completion API @@ -132,29 +157,42 @@ That will output: + ```python showLineNumbers + model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model) + stream = True -```python showLineNumbers -model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model) -stream = True - -completion = client.completions.create( - model=model, - prompt="What is Python programming language?", - stream=stream, - temperature=0.2, - max_tokens=128 -) - -if not stream: - print(completion.choices[0].text) - -else: - for chunk in completion: - if chunk.choices: - if chunk.choices[0].text is not None: - print(chunk.choices[0].text, end="", flush=True) -``` + completion = client.completions.create( + model=model, + prompt="What is Python programming language?", + stream=stream, + temperature=0.2, + max_tokens=128 + ) + + if not stream: + print(completion.choices[0].text) + else: + for chunk in completion: + if chunk.choices: + if chunk.choices[0].text is not None: + print(chunk.choices[0].text, end="", flush=True) + ``` + + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Llama-3.1-70B-Instruct", + "prompt": "What is Python programming language?", + "temperature": 0.2, + "max_tokens": 128, + "stream": true + }' + ``` @@ -181,6 +219,18 @@ The completions API is only available for open-source models. To get the correct print('Token usage: ',embeddings.usage) ``` + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/embeddings \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-embeddings-v2-base-de", + "input": ["I am Batman and I'm rich","I am Spiderman","I am Ironman and I'm a bilionaire", "I am Flash", "I am the president of USA"] + }' + ``` + Example output: @@ -207,7 +257,9 @@ Usage(prompt_tokens=31, total_tokens=31) "properties": { "location": { "type": "string", - "description": "The city and state, e.g. San Francisco, CA", + "description": + + "The city and state, e.g. San Francisco, CA", }, "format": { "type": "string", @@ -233,6 +285,45 @@ Usage(prompt_tokens=31, total_tokens=31) print(assistant_message) ``` + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4-turbo-128k-france", + "messages": [ + {"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."}, + {"role": "user", "content": "What's the weather like today in Hamburg"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use." + } + }, + "required": ["location", "format"] + } + } + } + ] + }' + ``` + Example output: @@ -272,6 +363,13 @@ Here is an example of how to use OpenAI Vision API for Llava-1.6-34b. print(model.id) ``` + + + ```bash showLineNumbers + curl -X GET https://llm-server.llmhub.t-systems.net/vision/models \ + -H "Authorization: Bearer YOUR_API_KEY" + ``` + Example output: @@ -327,6 +425,32 @@ llava-v1.6-vicuna-13b print(chat_response.choices[0].message.content) ``` + + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/vision/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llava-v1.6-vicuna-13b", + "messages": [ + { + "role": "system", + "content": [{"type": "text", "text": "You are an helpful AI assistant named LLava help people answer their question base on the image and text provided."}] + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "What’s in this image?"}, + {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"}} + ] + } + ], + "max_tokens": 300, + "temperature": 0.01 + }' + ``` + Example output: @@ -400,7 +524,37 @@ The overall scene is peaceful and invites one to imagine a walk through the fiel if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="") else: - print(chat_response.choices[0].message.content) + print(chat_response + +.choices[0].message.content) ``` - + + ```bash showLineNumbers + curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llava-v1.6-34b", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What’s in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "_BASE64_IMAGE_STRING" + } + } + ] + } + ], + "max_tokens": 1000, + "temperature": 0.01, + "stream": true + }' + ``` + + + \ No newline at end of file diff --git a/docs/Model Serving/Audio API.md b/docs/Model Serving/Audio API.md index 63e129a..2f83417 100644 --- a/docs/Model Serving/Audio API.md +++ b/docs/Model Serving/Audio API.md @@ -1,7 +1,7 @@ --- sidebar_position: 5 id: audio-api -title: Audio API Reference +title: Audio API tags: - Audio API - Transcription @@ -26,7 +26,7 @@ To interact with the audio API, set up your environment variables as follows: ```bash # Set API base URL -export API_BASE=https://llm-server.llmhub.t-systems.net +export API_BASE=https://llm-server.llmhub.t-systems.net/v2 # Set your API key export API_KEY=YOUR_LLMHUB_KEY diff --git a/package-lock.json b/package-lock.json index a1a0a81..ab81a4d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,7 @@ "name": "my-website", "version": "0.0.0", "dependencies": { - "@docusaurus/core": "3.2.1", + "@docusaurus/core": "^3.2.1", "@docusaurus/preset-classic": "3.2.1", "@easyops-cn/docusaurus-search-local": "^0.40.1", "@mdx-js/react": "^3.0.0", @@ -3127,6 +3127,7 @@ "version": "3.2.1", "resolved": "https://registry.npmjs.org/@docusaurus/core/-/core-3.2.1.tgz", "integrity": "sha512-ZeMAqNvy0eBv2dThEeMuNzzuu+4thqMQakhxsgT5s02A8LqRcdkg+rbcnuNqUIpekQ4GRx3+M5nj0ODJhBXo9w==", + "license": "MIT", "dependencies": { "@babel/core": "^7.23.3", "@babel/generator": "^7.23.3", @@ -6435,9 +6436,10 @@ } }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", diff --git a/package.json b/package.json index e0a7a9f..e40f4bd 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "write-heading-ids": "docusaurus write-heading-ids" }, "dependencies": { - "@docusaurus/core": "3.2.1", + "@docusaurus/core": "^3.2.1", "@docusaurus/preset-classic": "3.2.1", "@easyops-cn/docusaurus-search-local": "^0.40.1", "@mdx-js/react": "^3.0.0",