Skip to content

Commit

Permalink
feat: add multiple images (or in-context learning) conversation examp…
Browse files Browse the repository at this point in the history
…les (#47)

Co-authored-by: Bo Liu <[email protected]>
  • Loading branch information
StevenLiuWen and Benjamin-eecs authored Apr 16, 2024
1 parent 3c02b24 commit 9bb02cc
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 5 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Lint

on:
push:
branches:
- main
pull_request:
# Allow to trigger the workflow manually
workflow_dispatch:

permissions:
contents: read

concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

env:
CUDA_VERSION: "11.7"

jobs:
lint:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "recursive"
fetch-depth: 1

- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: "3.9"
update-environment: true

- name: Upgrade pip
run: |
python -m pip install --upgrade pip setuptools wheel
- name: Install TorchOpt
env:
USE_FP16: "OFF"
TORCH_CUDA_ARCH_LIST: "Auto"
run: |
python -m pip install torch numpy pybind11
python -m pip install -vvv --no-build-isolation --editable '.[lint]'
- name: pre-commit
run: |
make pre-commit
- name: ruff
run: |
make ruff
- name: flake8
run: |
make flake8
- name: isort and black
run: |
make py-format
- name: addlicense
run: |
make addlicense
26 changes: 21 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,34 @@ tokenizer = vl_chat_processor.tokenizer
vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()

## single image conversation example
conversation = [
{
"role": "User",
"content": "<image_placeholder>Describe each stage of this image.",
"images": ["./images/training_pipelines.jpg"]
"images": ["./images/training_pipelines.jpg"],
},
{
"role": "Assistant",
"content": ""
}
{"role": "Assistant", "content": ""},
]

## multiple images (or in-context learning) conversation example
# conversation = [
# {
# "role": "User",
# "content": "<image_placeholder>A dog wearing nothing in the foreground, "
# "<image_placeholder>a dog wearing a santa hat, "
# "<image_placeholder>a dog wearing a wizard outfit, and "
# "<image_placeholder>what's the dog wearing?",
# "images": [
# "images/dog_a.png",
# "images/dog_b.png",
# "images/dog_c.png",
# "images/dog_d.png",
# ],
# },
# {"role": "Assistant", "content": ""}
# ]

# load images and prepare for inputs
pil_images = load_pil_images(conversation)
prepare_inputs = vl_chat_processor(
Expand Down
Binary file added images/dog_a.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/dog_b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/dog_c.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/dog_d.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 18 additions & 0 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()

# single image conversation example
conversation = [
{
"role": "User",
Expand All @@ -42,6 +43,23 @@
{"role": "Assistant", "content": ""},
]

# multiple images (or in-context learning) conversation example
# conversation = [
# {
# "role": "User",
# "content": "<image_placeholder>A dog wearing nothing in the foreground, "
# "<image_placeholder>a dog wearing a santa hat, "
# "<image_placeholder>a dog wearing a wizard outfit, and "
# "<image_placeholder>what's the dog wearing?",
# "images": [
# "images/dog_a.png",
# "images/dog_b.png",
# "images/dog_c.png",
# "images/dog_d.png",
# ],
# },
# {"role": "Assistant", "content": ""}
# ]

# load images and prepare for inputs
pil_images = load_pil_images(conversation)
Expand Down
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ gradio = [
"markdown==3.4.1",
"SentencePiece==0.1.96"
]
lint = [
"isort",
"black[jupyter] >= 22.6.0",
"pylint[spelling] >= 2.15.0",
"flake8",
"flake8-bugbear",
"flake8-comprehensions",
"flake8-docstrings",
"flake8-pyi",
"flake8-simplify",
"ruff",
"pyenchant",
"pre-commit",
]

[tool.setuptools]
packages = {find = {exclude = ["images"]}}

0 comments on commit 9bb02cc

Please sign in to comment.