-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstable_main.py
107 lines (89 loc) · 4.35 KB
/
stable_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
from PIL import Image
from io import BytesIO
from diffusers.utils import make_image_grid, load_image
from diffusers import StableDiffusionImg2ImgPipeline, DiffusionPipeline, StableDiffusionXLImg2ImgPipeline, AutoPipelineForImage2Image
from diffusers import AutoencoderKL
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import cv2
import torch
import numpy as np
from diffusers.utils import load_image
from tqdm import tqdm
import itertools
import math
import os.path
import random
num_images_per_prompt: int = 1
num_inference_steps: int = 35
strengths = list(range(3, 10))
conditioning_scale = list(range(1, 9))
guidance_scales = list(range(2, 17))
eta_list = list(range(0, 11))
size_factor: float = 0.99
combined_list = list(itertools.product(conditioning_scale, strengths, guidance_scales))
generator = torch.Generator(device="cuda").manual_seed(2257817932)
device = "cuda"
# model_id_or_path = "acheong08/f222"
# model_id_or_path = "runwayml/stable-diffusion-v1-5"
# model_id_or_path = "CompVis/stable-diffusion-v1-4"
model_id_or_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
# model_id_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
# vae = AutoencoderKL.from_pretrained(model_id_or_path, subfolder="vae", torch_dtype=torch.float16, variant="fp16", use_safetensors=True).to(device)
# tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
# text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
# pipe = DiffusionPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe = AutoPipelineForImage2Image.from_pretrained(model_id_or_path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
pipe = pipe.to(device)
# pipe.enable_model_cpu_offload()
# 2. Load the tokenizer and text encoder to tokenize and encode the text.
init_image = Image.open("assets/images/athena_merge.jpg").convert("RGB")
width, height = init_image.size
ratio = width / height
new_height = 900
new_width = int(ratio * new_height)
print(new_width, new_height)
init_image = init_image.resize((new_width, new_height))
image = np.array(init_image)
low_threshold = 100
high_threshold = 200
cally_image = cv2.Canny(image, low_threshold, high_threshold)
cally_image = cally_image[:, :, None]
cally_image = np.concatenate([cally_image, cally_image, cally_image], axis=2)
cally_image = Image.fromarray(cally_image)
cally_image.save(f"output/canny_images_resize.png")
init_image.save(f"output/init_images_resize.png")
# prompt = "An astronaut riding a green horse"
# url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png"
# init_image = load_image(url)
prompt = "Astronaut in a war zone, guns in hands, detailed, 8k, best quality, high quality"
neg_prompt = "ugly, deformed, disfigured, poor details, bad anatomy, free hair, mutant, cropped, worst quality, low quality, jpeg artifacts, signature, watermark, username, blurry, made by children, caricature, ugly, boring, sketch, lacklustre, repetitive, cropped, (long neck), body horror, out of frame, mutilated, tiled, frame, border, porcelain skin"
# print(all_images)
n = 0
# for image in all_images:
# n = n + 1
# # print(image)
# image.save(f"output/images_{n}.png")
n = 0
for item in tqdm(combined_list, total=len(combined_list)):
n = n + 1
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
eta, strength, guidance_scale = item
strength = 0.11 * strength
eta = 0.1 * eta
print(f"kot_inpaint_{round(eta, 4)}_{round(strength, 4)}_{round(guidance_scale, 4)}.png")
# generate image
all_images = pipe(
prompt=prompt,
negative_prompt=neg_prompt,
image=init_image,
num_inference_steps=num_inference_steps,
strength=strength,
generator=generator,
guidance_scale=guidance_scale,
eta=eta,
).images[0]
filename: str = f"/mnt/d/Data/gen_images/kot_inpaint_{round(eta, 4)}_{round(strength, 4)}_{round(guidance_scale, 4)}.png"
all_images.save(filename)