diff --git a/docs/Endpoints/ChatCompletionAPI.md b/docs/Endpoints/ChatCompletionAPI.md
new file mode 100644
index 0000000..6523261
--- /dev/null
+++ b/docs/Endpoints/ChatCompletionAPI.md
@@ -0,0 +1,61 @@
+---
+sidebar_position: 1
+id: chat-completion-api
+title: Chat Completion API
+tags:
+ - OpenAI API
+ - Chat Models
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Chat Completion API
+
+**The role and the content of the prompt for each role is required.**
+
+
+
+ ```python showLineNumbers
+ model = "Llama-3.1-70B-Instruct" # choose one of available LLM (not the embedding model)
+ stream = True
+
+ chat_response = client.chat.completions.create(
+ model=model,
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant named Llama-3."},
+ {"role": "user", "content": "What is Open Telekom Cloud?"},
+ ],
+ temperature=0.1,
+ max_tokens=256,
+ stream=stream
+ )
+
+ if not stream:
+ print(chat_response.choices[0].message.content)
+ else:
+ for chunk in chat_response:
+ if chunk.choices:
+ if chunk.choices[0].delta.content is not None:
+ print(chunk.choices[0].delta.content, end="", flush=True)
+ ```
+
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-3.1-70B-Instruct",
+ "messages": [
+ {"role": "system", "content": "You are a helpful assistant named Llama-3."},
+ {"role": "user", "content": "What is Open Telekom Cloud?"}
+ ],
+ "temperature": 0.1,
+ "max_tokens": 256,
+ "stream": true
+ }'
+ ```
+
+
diff --git a/docs/Endpoints/CompletionAPI.md b/docs/Endpoints/CompletionAPI.md
new file mode 100644
index 0000000..d26b423
--- /dev/null
+++ b/docs/Endpoints/CompletionAPI.md
@@ -0,0 +1,56 @@
+---
+sidebar_position: 2
+id: completion-api
+title: Completion API
+tags:
+ - OpenAI API
+ - Text Models
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Completion API
+
+**With this API, the raw text will be sent directly to the LLM without a special tag template.**
+
+
+
+ ```python showLineNumbers
+ model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model)
+ stream = True
+
+ completion = client.completions.create(
+ model=model,
+ prompt="What is Python programming language?",
+ stream=stream,
+ temperature=0.2,
+ max_tokens=128
+ )
+
+ if not stream:
+ print(completion.choices[0].text)
+
+ else:
+ for chunk in completion:
+ if chunk.choices:
+ if chunk.choices[0].text is not None:
+ print(chunk.choices[0].text, end="", flush=True)
+ ```
+
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-3.1-70B-Instruct",
+ "prompt": "What is Python programming language?",
+ "temperature": 0.2,
+ "max_tokens": 128,
+ "stream": true
+ }'
+ ```
+
+
diff --git a/docs/Endpoints/EmbeddingAPI.md b/docs/Endpoints/EmbeddingAPI.md
new file mode 100644
index 0000000..f30c0e3
--- /dev/null
+++ b/docs/Endpoints/EmbeddingAPI.md
@@ -0,0 +1,42 @@
+---
+sidebar_position: 3
+id: embedding-api
+title: Embedding API
+tags:
+ - OpenAI API
+ - Embedding Models
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Embedding API
+
+
+
+ ```python showLineNumbers
+ model = "jina-embeddings-v2-base-de"
+
+ texts = ["I am Batman and I'm rich", "I am Spiderman", "I am Ironman and I'm a billionaire", "I am Flash", "I am the president of USA"]
+ embeddings = client.embeddings.create(
+ input=texts, model=model
+ )
+
+ print('Embedding dimension: ', len(embeddings.data[0].embedding))
+ print('Number of embedding vectors: ', len(embeddings.data))
+ print('Token usage: ', embeddings.usage)
+ ```
+
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/embeddings \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jina-embeddings-v2-base-de",
+ "input": ["I am Batman and I'm rich", "I am Spiderman", "I am Ironman and I'm a billionaire", "I am Flash", "I am the president of USA"]
+ }'
+ ```
+
+
diff --git a/docs/Endpoints/_category_.json b/docs/Endpoints/_category_.json
new file mode 100644
index 0000000..e7c32c0
--- /dev/null
+++ b/docs/Endpoints/_category_.json
@@ -0,0 +1,8 @@
+{
+ "label": "Endpoints",
+ "position": 5,
+ "link": {
+ "type": "generated-index",
+ "description": "List of available Endpoints in AI Foundation Sevices"
+ }
+}
diff --git a/docs/Model Serving/API Reference.md b/docs/Model Serving/API Reference.md
index bdefb53..d3c8e09 100644
--- a/docs/Model Serving/API Reference.md
+++ b/docs/Model Serving/API Reference.md
@@ -49,6 +49,13 @@ pip install openai
print(model.id)
```
+
+
+ ```bash showLineNumbers
+ curl -X GET https://llm-server.llmhub.t-systems.net/v2/models \
+ -H "Authorization: Bearer YOUR_API_KEY"
+ ```
+
:::info
@@ -124,6 +131,24 @@ That will output:
print(chunk.choices[0].delta.content, end="", flush=True)
```
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-3.1-70B-Instruct",
+ "messages": [
+ {"role": "system", "content": "You are a helpful assistant named Llama-3."},
+ {"role": "user", "content": "What is Open Telekom Cloud?"}
+ ],
+ "temperature": 0.1,
+ "max_tokens": 256,
+ "stream": true
+ }'
+ ```
+
### Completion API
@@ -132,29 +157,42 @@ That will output:
+ ```python showLineNumbers
+ model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model)
+ stream = True
-```python showLineNumbers
-model = "Llama-3.1-70B-Instruct" # choose one of the available LLMs (not the embedding model)
-stream = True
-
-completion = client.completions.create(
- model=model,
- prompt="What is Python programming language?",
- stream=stream,
- temperature=0.2,
- max_tokens=128
-)
-
-if not stream:
- print(completion.choices[0].text)
-
-else:
- for chunk in completion:
- if chunk.choices:
- if chunk.choices[0].text is not None:
- print(chunk.choices[0].text, end="", flush=True)
-```
+ completion = client.completions.create(
+ model=model,
+ prompt="What is Python programming language?",
+ stream=stream,
+ temperature=0.2,
+ max_tokens=128
+ )
+
+ if not stream:
+ print(completion.choices[0].text)
+ else:
+ for chunk in completion:
+ if chunk.choices:
+ if chunk.choices[0].text is not None:
+ print(chunk.choices[0].text, end="", flush=True)
+ ```
+
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-3.1-70B-Instruct",
+ "prompt": "What is Python programming language?",
+ "temperature": 0.2,
+ "max_tokens": 128,
+ "stream": true
+ }'
+ ```
@@ -181,6 +219,18 @@ The completions API is only available for open-source models. To get the correct
print('Token usage: ',embeddings.usage)
```
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/embeddings \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jina-embeddings-v2-base-de",
+ "input": ["I am Batman and I'm rich","I am Spiderman","I am Ironman and I'm a bilionaire", "I am Flash", "I am the president of USA"]
+ }'
+ ```
+
Example output:
@@ -207,7 +257,9 @@ Usage(prompt_tokens=31, total_tokens=31)
"properties": {
"location": {
"type": "string",
- "description": "The city and state, e.g. San Francisco, CA",
+ "description":
+
+ "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
@@ -233,6 +285,45 @@ Usage(prompt_tokens=31, total_tokens=31)
print(assistant_message)
```
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4-turbo-128k-france",
+ "messages": [
+ {"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."},
+ {"role": "user", "content": "What's the weather like today in Hamburg"}
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "Get the current weather",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "format": {
+ "type": "string",
+ "enum": ["celsius", "fahrenheit"],
+ "description": "The temperature unit to use."
+ }
+ },
+ "required": ["location", "format"]
+ }
+ }
+ }
+ ]
+ }'
+ ```
+
Example output:
@@ -272,6 +363,13 @@ Here is an example of how to use OpenAI Vision API for Llava-1.6-34b.
print(model.id)
```
+
+
+ ```bash showLineNumbers
+ curl -X GET https://llm-server.llmhub.t-systems.net/vision/models \
+ -H "Authorization: Bearer YOUR_API_KEY"
+ ```
+
Example output:
@@ -327,6 +425,32 @@ llava-v1.6-vicuna-13b
print(chat_response.choices[0].message.content)
```
+
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/vision/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "llava-v1.6-vicuna-13b",
+ "messages": [
+ {
+ "role": "system",
+ "content": [{"type": "text", "text": "You are an helpful AI assistant named LLava help people answer their question base on the image and text provided."}]
+ },
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What’s in this image?"},
+ {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"}}
+ ]
+ }
+ ],
+ "max_tokens": 300,
+ "temperature": 0.01
+ }'
+ ```
+
Example output:
@@ -400,7 +524,37 @@ The overall scene is peaceful and invites one to imagine a walk through the fiel
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
else:
- print(chat_response.choices[0].message.content)
+ print(chat_response
+
+.choices[0].message.content)
```
-
+
+ ```bash showLineNumbers
+ curl -X POST https://llm-server.llmhub.t-systems.net/v2/chat/completions \
+ -H "Authorization: Bearer YOUR_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "llava-v1.6-34b",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What’s in this image?"},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "_BASE64_IMAGE_STRING"
+ }
+ }
+ ]
+ }
+ ],
+ "max_tokens": 1000,
+ "temperature": 0.01,
+ "stream": true
+ }'
+ ```
+
+
+
\ No newline at end of file
diff --git a/docs/Model Serving/Audio API.md b/docs/Model Serving/Audio API.md
index 63e129a..2f83417 100644
--- a/docs/Model Serving/Audio API.md
+++ b/docs/Model Serving/Audio API.md
@@ -1,7 +1,7 @@
---
sidebar_position: 5
id: audio-api
-title: Audio API Reference
+title: Audio API
tags:
- Audio API
- Transcription
@@ -26,7 +26,7 @@ To interact with the audio API, set up your environment variables as follows:
```bash
# Set API base URL
-export API_BASE=https://llm-server.llmhub.t-systems.net
+export API_BASE=https://llm-server.llmhub.t-systems.net/v2
# Set your API key
export API_KEY=YOUR_LLMHUB_KEY
diff --git a/package-lock.json b/package-lock.json
index a1a0a81..ab81a4d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,7 +8,7 @@
"name": "my-website",
"version": "0.0.0",
"dependencies": {
- "@docusaurus/core": "3.2.1",
+ "@docusaurus/core": "^3.2.1",
"@docusaurus/preset-classic": "3.2.1",
"@easyops-cn/docusaurus-search-local": "^0.40.1",
"@mdx-js/react": "^3.0.0",
@@ -3127,6 +3127,7 @@
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/@docusaurus/core/-/core-3.2.1.tgz",
"integrity": "sha512-ZeMAqNvy0eBv2dThEeMuNzzuu+4thqMQakhxsgT5s02A8LqRcdkg+rbcnuNqUIpekQ4GRx3+M5nj0ODJhBXo9w==",
+ "license": "MIT",
"dependencies": {
"@babel/core": "^7.23.3",
"@babel/generator": "^7.23.3",
@@ -6435,9 +6436,10 @@
}
},
"node_modules/cross-spawn": {
- "version": "7.0.3",
- "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
- "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+ "version": "7.0.6",
+ "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+ "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+ "license": "MIT",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
diff --git a/package.json b/package.json
index e0a7a9f..e40f4bd 100644
--- a/package.json
+++ b/package.json
@@ -14,7 +14,7 @@
"write-heading-ids": "docusaurus write-heading-ids"
},
"dependencies": {
- "@docusaurus/core": "3.2.1",
+ "@docusaurus/core": "^3.2.1",
"@docusaurus/preset-classic": "3.2.1",
"@easyops-cn/docusaurus-search-local": "^0.40.1",
"@mdx-js/react": "^3.0.0",