From 74eaf024847b99a3edbc1fb90edb9d9234c4b3b8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 11 Aug 2024 01:31:53 +0200 Subject: [PATCH] feat(diffusers): support flux models (#3129) * feat(diffusers): support flux models This adds support for FLUX models. For instance: https://huggingface.co/black-forest-labs/FLUX.1-dev Signed-off-by: Ettore Di Giacinto * feat(diffusers): support FluxTransformer2DModel Signed-off-by: Ettore Di Giacinto * Small fixups Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- backend/python/diffusers/backend.py | 39 +++++++++++++++++-- backend/python/diffusers/requirements-cpu.txt | 3 +- .../diffusers/requirements-cublas11.txt | 3 +- .../diffusers/requirements-cublas12.txt | 3 +- .../python/diffusers/requirements-hipblas.txt | 1 + .../python/diffusers/requirements-intel.txt | 3 +- 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index a348d290e7c6..8f42084822f2 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -18,13 +18,13 @@ import grpc from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ - EulerAncestralDiscreteScheduler + EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.utils import load_image, export_to_video from compel import Compel, ReturnedEmbeddingsType - -from transformers import CLIPTextModel +from optimum.quanto import freeze, qfloat8, quantize +from transformers import CLIPTextModel, T5EncoderModel from safetensors.torch import load_file _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -163,6 +163,8 @@ def LoadModel(self, request, context): modelFile = request.Model self.cfg_scale = 7 + self.PipelineType = request.PipelineType + if request.CFGScale != 0: self.cfg_scale = request.CFGScale @@ -244,6 +246,30 @@ def LoadModel(self, request, context): torch_dtype=torchType, use_safetensors=True, variant=variant) + elif request.PipelineType == "FluxPipeline": + self.pipe = FluxPipeline.from_pretrained( + request.Model, + torch_dtype=torch.bfloat16) + if request.LowVRAM: + self.pipe.enable_model_cpu_offload() + elif request.PipelineType == "FluxTransformer2DModel": + dtype = torch.bfloat16 + # specify from environment or default to "ChuckMcSneed/FLUX.1-dev" + bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev") + + transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype) + quantize(transformer, weights=qfloat8) + freeze(transformer) + text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype) + quantize(text_encoder_2, weights=qfloat8) + freeze(text_encoder_2) + + self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype) + self.pipe.transformer = transformer + self.pipe.text_encoder_2 = text_encoder_2 + + if request.LowVRAM: + self.pipe.enable_model_cpu_offload() if CLIPSKIP and request.CLIPSkip != 0: self.clip_skip = request.CLIPSkip @@ -399,6 +425,13 @@ def GenerateImage(self, request, context): request.seed ) + if self.PipelineType == "FluxPipeline": + kwargs["max_sequence_length"] = 256 + + if self.PipelineType == "FluxTransformer2DModel": + kwargs["output_type"] = "pil" + kwargs["generator"] = torch.Generator("cpu").manual_seed(0) + if self.img2vid: # Load the conditioning image image = load_image(request.src) diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt index e46a53e5098b..235bb57e3d2f 100644 --- a/backend/python/diffusers/requirements-cpu.txt +++ b/backend/python/diffusers/requirements-cpu.txt @@ -5,4 +5,5 @@ accelerate compel peft sentencepiece -torch \ No newline at end of file +torch +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt index df28b8214b58..40e718cb1f7c 100644 --- a/backend/python/diffusers/requirements-cublas11.txt +++ b/backend/python/diffusers/requirements-cublas11.txt @@ -6,4 +6,5 @@ transformers accelerate compel peft -sentencepiece \ No newline at end of file +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt index b0685a6206d6..3bcc53972aa8 100644 --- a/backend/python/diffusers/requirements-cublas12.txt +++ b/backend/python/diffusers/requirements-cublas12.txt @@ -5,4 +5,5 @@ transformers accelerate compel peft -sentencepiece \ No newline at end of file +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index fc9ea3b43c21..17cf72491555 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -8,3 +8,4 @@ accelerate compel peft sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index 77f9e6741ccd..1cc2e2a2bdb6 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -10,4 +10,5 @@ transformers accelerate compel peft -sentencepiece \ No newline at end of file +sentencepiece +optimum-quanto \ No newline at end of file