Merge branch 'main' into table-transformer-enabling

huggingface · Jul 28, 2024 · 6da4a96 · 6da4a96
2 parents 726a2d0 + a246660
commit 6da4a96
Show file tree

Hide file tree

Showing 21 changed files with 917 additions and 6 deletions.
diff --git a/Makefile b/Makefile
@@ -61,6 +61,11 @@ fast_tests_feature_extraction:
 fast_test_videomae:
 	python -m pip install .[tests]
 	python -m pytest tests/test_video_mae.py
+
+# Run unit and integration tests related to Image segmentation
+fast_tests_object_detection:
+	python -m pip install .[tests]
+	python -m pytest tests/test_object_detection.py
 
 # Run integration tests related to table transformers
 fast_tests_table_transformers:

diff --git a/README.md b/README.md
@@ -217,6 +217,7 @@ The following model architectures, tasks and device distributions have been vali
 | Segment Anything Model |   | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
 | VideoMAE | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |
 | TableTransformer |   | <div style="text-align:left"><li>Single card</li></div> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection) </li> |
+| DETR |   | <div style="text-align:left"><li>Single card</li></div> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/object-detection)</li> |
 
 </div>
 

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -75,6 +75,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | SAM          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
 | VideoMAE |          | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |
 | TableTransformer |       | <div style="text-align:left"><li>Single card</li></div> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection)</li> |
+| DETR         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/object-detection)</li> |
 
 - Diffusers
 

diff --git a/docs/source/package_reference/stable_diffusion_pipeline.mdx b/docs/source/package_reference/stable_diffusion_pipeline.mdx
@@ -75,3 +75,11 @@ It inherits from the `GaudiDiffusionPipeline` class that is the parent to any ki
 
 [[autodoc]] diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.GaudiStableDiffusionUpscalePipeline
     - __call__
+
+
+# GaudiDDPMPipeline
+
+The `GaudiDDPMPipeline` is to enable unconditional image generations on HPUs. It has similar APIs as the regular `DiffusionPipeline`.
+It shares a common parent class, `GaudiDiffusionPipeline`, with other existing Gaudi pipelines. It now supports both DDPM and DDIM scheduler. 
+It is recommended to use the optimized scheduler, `GaudiDDIMScheduler`, to obtain the best performance and image outputs.
+
diff --git a/examples/object-detection/README.md b/examples/object-detection/README.md
@@ -0,0 +1,34 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Object Detection Example
+
+This folder contains an example script which demonstrates the usage of DETR to run object detection task on Gaudi platform.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+	--model_name_or_path facebook/detr-resnet-101 \
+	--image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
+	--use_hpu_graphs \
+	--bf16 \
+	--print_result
+```
+
+Models that have been validated:
+  - [facebook/detr-resnet-101](https://huggingface.co/facebook/detr-resnet-101)
+  - [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50)
diff --git a/examples/object-detection/run_example.py b/examples/object-detection/run_example.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/docs/transformers/model_doc/owlvit
+
+import argparse
+import time
+
+import habana_frameworks.torch as ht
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, DetrForObjectDetection
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="facebook/detr-resnet-101",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="http://images.cocodataset.org/val2017/000000039769.jpg",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for object detection.",
+    )
+    parser.add_argument(
+        "--detect_threshold",
+        type=float,
+        default=0.9,
+        help="Detection threshold score (otherwise dismissed)",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to print the detection results.",
+    )
+
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument(
+        "--n_iterations", type=int, default=10, help="Number of inference iterations for benchmarking."
+    )
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    # you can specify the revision tag if you don't want the timm dependency
+    processor = AutoProcessor.from_pretrained("facebook/detr-resnet-101", revision="no_timm")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101", revision="no_timm")
+
+    image = Image.open(requests.get(args.image_path, stream=True).raw)
+
+    inputs = processor(images=image, return_tensors="pt").to("hpu")
+    model.to("hpu")
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = processor(images=image, return_tensors="pt").to("hpu")
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = processor(images=image, return_tensors="pt").to("hpu")
+            model_start_time = time.time()
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+    if args.print_result:
+        target_sizes = torch.tensor([image.size[::-1]])
+        results = processor.post_process_object_detection(
+            outputs, target_sizes=target_sizes, threshold=args.detect_threshold
+        )[0]
+
+        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+            box = [round(i, 2) for i in box.tolist()]
+            print(
+                f"Detected {model.config.id2label[label.item()]} with confidence "
+                f"{round(score.item(), 3)} at location {box}"
+            )
+
+tot_stat = f"Total latency (ms): {str(total_model_time * 1000)} (for n_iterations={str(args.n_iterations)}) "
+avg_stat = f"Average latency (ms): {str(total_model_time * 1000 / args.n_iterations)} (per iteration) "
+separator = "-" * max(len(tot_stat), len(avg_stat))
+print()
+print("Stats:")
+print(separator)
+print(tot_stat)
+print(avg_stat)
+print(separator)
diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
@@ -256,7 +256,7 @@ python text_to_image_generation.py \
 
 > Please note: there is a regression with "--guidance_scale 0.0" for the latest release.
 
- 
+
 ### ControlNet
 
 ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models ](https://huggingface.co/papers/2302.05543) by Lvmin Zhang and Maneesh Agrawala.
@@ -508,6 +508,7 @@ python text_to_image_generation.py \
 ```
 
 ### Stable Diffusion XL Inpainting
+
 ```bash
 python text_to_image_generation.py \
     --model_name_or_path  diffusers/stable-diffusion-xl-1.0-inpainting-0.1\
@@ -523,3 +524,22 @@ python text_to_image_generation.py \
     --use_hpu_graphs \
     --gaudi_config Habana/stable-diffusion
 ```
+
+
+### Unconditional Image Generation Example
+
+Here is how to perform unconditional-image-generation on Gaudi/HPU.
+
+Original unconditional image generation pipeline is shared in here: [Unconditional Image Generation](https://huggingface.co/docs/diffusers/using-diffusers/unconditional_image_generation)
+
+```bash
+python3 unconditional_image_generation.py \
+    --model_name_or_path "google/ddpm-ema-celebahq-256" \
+    --batch_size 16 \
+    --use_habana \
+    --use_gaudi_ddim_scheduler \
+    --use_hpu_graphs \
+    --bf16 \
+    --save_outputs \
+    --output_dir "/tmp/"
+```
diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py
@@ -0,0 +1,113 @@
+import argparse
+import logging
+import sys
+
+from diffusers import DDPMScheduler
+from transformers.utils import check_min_version
+
+from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiDDPMPipeline
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+
+
+logger = logging.getLogger(__name__)
+
+try:
+    from optimum.habana.utils import check_optimum_habana_min_version
+except ImportError:
+
+    def check_optimum_habana_min_version(*a, **b):
+        return ()
+
+
+check_min_version("4.37.0")
+check_optimum_habana_min_version("1.10.4")
+
+# Setup logging
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger.setLevel(logging.INFO)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name_or_path",
+        default="google/ddpm-ema-celebahq-256",
+        type=str,
+        help="Path of the pre-trained unconditional image generation model",
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=16,
+        help="Batch size for the task.",
+    )
+    parser.add_argument(
+        "--num_inference_steps", type=int, default=1000, help="Number of inference steps for the denoising UNet."
+    )
+    parser.add_argument(
+        "--use_gaudi_ddim_scheduler",
+        action="store_true",
+        help="Whether to use the Gaudi optimized DDIM scheduler. The default is DDPMScheduler",
+    )
+    parser.add_argument(
+        "--use_habana",
+        action="store_true",
+        help="Whether to use HPU for computations.",
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--save_outputs",
+        action="store_true",
+        help="Whether to save the generated images to jpg.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="/tmp/",
+        help="Where to save the generated images. The default is DDPMScheduler.",
+    )
+
+    args = parser.parse_args()
+    model_name = args.model_name_or_path
+
+    if args.use_gaudi_ddim_scheduler:
+        scheduler = GaudiDDIMScheduler.from_pretrained(model_name)
+    else:
+        scheduler = DDPMScheduler.from_pretrained(model_name)
+
+    gaudi_kwargs = {
+        "use_torch_autocast": args.bf16,
+    }
+    gaudi_config = GaudiConfig(**gaudi_kwargs)
+
+    kwargs = {
+        "scheduler": scheduler,
+        "use_habana": args.use_habana,
+        "use_hpu_graphs": args.use_hpu_graphs,
+        "gaudi_config": gaudi_config,
+    }
+
+    pipeline = GaudiDDPMPipeline.from_pretrained(model_name, **kwargs)
+    output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps)
+
+    if args.output_dir:
+        logger.info(f"Generating outputs to {args.output_dir}")
+        for i in range(len(output.images)):
+            output.images[i].save(args.output_dir + "unconditional_image_" + str(i) + ".jpg")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
@@ -427,12 +427,12 @@ def generate(size=None, reduce_recompile=False):
         t0 = time.perf_counter()
         # The first three iterations take longer because of graph compilation
         if dyn_prompt_lens is None or len(set(dyn_prompt_lens)) == 1:
-            for _ in range(args.warmup):
+            for i in range(args.warmup):
                 if dyn_prompt_lens is None:
-                    print("Warming up", flush=True)
+                    print(f"Warming up iteration {i+1}/{args.warmup}", flush=True)
                     generate(None, args.reduce_recompile)
                 else:
-                    print("Warming up for shape,", dyn_prompt_lens[0], flush=True)
+                    print(f"Warming up for shape {dyn_prompt_lens[0]} iteration {i+1}/{args.warmup}", flush=True)
                     generate(dyn_prompt_lens[0], args.reduce_recompile)
         else:
             if args.bucket_size > 0:
@@ -444,10 +444,10 @@ def rounder(x):
 
                 min_prompt_len = rounder(mn)
                 max_sentence_len = rounder(mx)
-                for _ in range(args.warmup):
+                for i in range(args.warmup):
                     lst = list(range(min_prompt_len, max_sentence_len + 1, args.bucket_size))
                     for sz in lst:
-                        print("Warming up for shape,", sz - 1, flush=True)
+                        print(f"Warming up for shape {sz - 1} iteration {i+1}/{args.warmup}", flush=True)
                         generate(sz - 1, args.reduce_recompile)
         torch_hpu.synchronize()
         compilation_duration = time.perf_counter() - t0

diff --git a/optimum/habana/diffusers/__init__.py b/optimum/habana/diffusers/__init__.py
@@ -1,5 +1,6 @@
 from .pipelines.auto_pipeline import AutoPipelineForInpainting, AutoPipelineForText2Image
 from .pipelines.controlnet.pipeline_controlnet import GaudiStableDiffusionControlNetPipeline
+from .pipelines.ddpm.pipeline_ddpm import GaudiDDPMPipeline
 from .pipelines.pipeline_utils import GaudiDiffusionPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion import GaudiStableDiffusionPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion_image_variation import (

diff --git a/optimum/habana/diffusers/models/__init__.py b/optimum/habana/diffusers/models/__init__.py
@@ -1 +1,2 @@
+from .unet_2d import gaudi_unet_2d_model_forward
 from .unet_2d_condition import gaudi_unet_2d_condition_model_forward