From a5abe1056aeb971e08f241848ed924450af29425 Mon Sep 17 00:00:00 2001 From: kabachuha Date: Wed, 6 Dec 2023 14:58:56 +0300 Subject: [PATCH 1/7] multimodal api image url parsing support --- extensions/openai/completions.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 389466ff25..ec8bc69f2f 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -17,7 +17,10 @@ ) from modules.presets import load_preset_memoized from modules.text_generation import decode, encode, generate_reply - +from PIL import Image +from io import BytesIO +import base64 +import requests class LogitsBiasProcessor(LogitsProcessor): def __init__(self, logit_bias={}): @@ -139,7 +142,23 @@ def convert_history(history): system_message = "" for entry in history: - content = entry["content"] + if "image_url" in entry: + image_url = entry['image_url'] + if "base64" in image_url: + image_url = image_url.split('base64')[1] + img = Image.open(BytesIO(base64.b64decode(image_url))) + else: + try: + my_res = requests.get(image_url) + img = Image.open(BytesIO(my_res.content)) + except Exception as e: + raise 'Image cannot be loaded from the URL!' + buffered = BytesIO() + img.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') + content = f'' + else: + content = entry["content"] role = entry["role"] if role == "user": @@ -181,7 +200,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) - raise InvalidRequestError(message="messages: missing role", param='messages') elif m['role'] == 'function': raise InvalidRequestError(message="role: function is not supported.", param='messages') - if 'content' not in m: + if 'content' not in m and "image_url" not in m: raise InvalidRequestError(message="messages: missing content", param='messages') # Chat Completions From 614232ad2306896207f20ff4e70aaf6f7853555c Mon Sep 17 00:00:00 2001 From: kabachuha Date: Wed, 6 Dec 2023 15:07:41 +0300 Subject: [PATCH 2/7] use regex for base64 prefix subbing --- extensions/openai/completions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index ec8bc69f2f..01d4eb206f 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -21,6 +21,7 @@ from io import BytesIO import base64 import requests +import re class LogitsBiasProcessor(LogitsProcessor): def __init__(self, logit_bias={}): @@ -145,7 +146,7 @@ def convert_history(history): if "image_url" in entry: image_url = entry['image_url'] if "base64" in image_url: - image_url = image_url.split('base64')[1] + image_url = re.sub('^data:image/.+;base64,', '', image_url) img = Image.open(BytesIO(base64.b64decode(image_url))) else: try: From 1442cf2d651a822cc60be574ebb1a541c96a264b Mon Sep 17 00:00:00 2001 From: kabachuha Date: Fri, 8 Dec 2023 16:10:46 +0300 Subject: [PATCH 3/7] image format fixup --- extensions/openai/completions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 01d4eb206f..a632a79cb8 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -155,7 +155,7 @@ def convert_history(history): except Exception as e: raise 'Image cannot be loaded from the URL!' buffered = BytesIO() - img.save(buffered, format="PNG") + img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') content = f'' else: From 7f81db21eccec7ccce9eaf6e866cb228bbba0753 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 22 Dec 2023 17:24:08 -0800 Subject: [PATCH 4/7] Very minor style changes --- extensions/openai/completions.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index a632a79cb8..326ed853ae 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -1,10 +1,15 @@ +import base64 import copy +import re import time from collections import deque +from io import BytesIO +import requests import tiktoken import torch import torch.nn.functional as F +from PIL import Image from transformers import LogitsProcessor, LogitsProcessorList from extensions.openai.errors import InvalidRequestError @@ -17,11 +22,7 @@ ) from modules.presets import load_preset_memoized from modules.text_generation import decode, encode, generate_reply -from PIL import Image -from io import BytesIO -import base64 -import requests -import re + class LogitsBiasProcessor(LogitsProcessor): def __init__(self, logit_bias={}): @@ -152,14 +153,16 @@ def convert_history(history): try: my_res = requests.get(image_url) img = Image.open(BytesIO(my_res.content)) - except Exception as e: + except Exception: raise 'Image cannot be loaded from the URL!' + buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') content = f'' else: content = entry["content"] + role = entry["role"] if role == "user": From cca1f0fcbbf95a89e964e9b81f65f3d28ba3c0e5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 22 Dec 2023 17:34:17 -0800 Subject: [PATCH 5/7] Add @kabachuha's examples to the documentation --- extensions/multimodal/README.md | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/extensions/multimodal/README.md b/extensions/multimodal/README.md index 87183587c8..5e874af36c 100644 --- a/extensions/multimodal/README.md +++ b/extensions/multimodal/README.md @@ -67,6 +67,52 @@ This extension uses the following parameters (from `settings.json`): ## Usage through API +### Chat completions endpoint + +#### With an image URL + +```shell +curl http://127.0.0.1:5000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + { + "role": "user", + "image_url": "https://avatars.githubusercontent.com/u/112222186?v=4" + }, + { + "role": "user", + "content": "What is unusual about this image?" + } + ] + }' +``` + +#### With a Base64 image + +``` +import base64 +import json +import requests + +img = open('image.jpg', 'rb') +img_bytes = img.read() +img_base64 = base64.b64encode(img_bytes).decode('utf-8') +data = { "messages": [ + { + "role": "user", + "image_url": f"data:image/jpeg;base64,{img_base64}" + }, + { + "role": "user", + "content": "what is unusual about this image?" + } + ] +} +response = requests.post('http://127.0.0.1:5000/v1/chat/completions', json=data) +print(response.text) +``` + You can run the multimodal inference through API, by inputting the images to prompt. Images are embedded like so: `f''`, where `img_str` is base-64 jpeg data. Note that you will need to launch `server.py` with the arguments `--api --extensions multimodal`. Python example: From 5e0b532098888a61669d8123f929a857fd042d96 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 22 Dec 2023 17:36:24 -0800 Subject: [PATCH 6/7] Update doc --- extensions/multimodal/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/multimodal/README.md b/extensions/multimodal/README.md index 5e874af36c..b176eca3d6 100644 --- a/extensions/multimodal/README.md +++ b/extensions/multimodal/README.md @@ -90,7 +90,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \ #### With a Base64 image -``` +```python import base64 import json import requests @@ -115,6 +115,8 @@ print(response.text) You can run the multimodal inference through API, by inputting the images to prompt. Images are embedded like so: `f''`, where `img_str` is base-64 jpeg data. Note that you will need to launch `server.py` with the arguments `--api --extensions multimodal`. +### Completions endpoint + Python example: ```Python From ea1d6c7066eeb182cf7a92a541a69c12e82aca24 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 22 Dec 2023 17:38:35 -0800 Subject: [PATCH 7/7] Add space --- extensions/openai/completions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 22fda34a4a..26017f372c 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -205,6 +205,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) - raise InvalidRequestError(message="messages: missing role", param='messages') elif m['role'] == 'function': raise InvalidRequestError(message="role: function is not supported.", param='messages') + if 'content' not in m and "image_url" not in m: raise InvalidRequestError(message="messages: missing content", param='messages')