Skip to content

Commit fcbed3f

Browse files
authored
Fix SDXL Inpainting from single file with Refiner Model (huggingface#6147)
* update * update * update
1 parent b98b314 commit fcbed3f

2 files changed

Lines changed: 63 additions & 71 deletions

File tree

src/diffusers/loaders/single_file.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,12 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
169169
load_safety_checker = kwargs.pop("load_safety_checker", True)
170170
prediction_type = kwargs.pop("prediction_type", None)
171171
text_encoder = kwargs.pop("text_encoder", None)
172+
text_encoder_2 = kwargs.pop("text_encoder_2", None)
172173
vae = kwargs.pop("vae", None)
173174
controlnet = kwargs.pop("controlnet", None)
174175
adapter = kwargs.pop("adapter", None)
175176
tokenizer = kwargs.pop("tokenizer", None)
177+
tokenizer_2 = kwargs.pop("tokenizer_2", None)
176178

177179
torch_dtype = kwargs.pop("torch_dtype", None)
178180

@@ -274,8 +276,10 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
274276
load_safety_checker=load_safety_checker,
275277
prediction_type=prediction_type,
276278
text_encoder=text_encoder,
279+
text_encoder_2=text_encoder_2,
277280
vae=vae,
278281
tokenizer=tokenizer,
282+
tokenizer_2=tokenizer_2,
279283
original_config_file=original_config_file,
280284
config_files=config_files,
281285
local_files_only=local_files_only,

src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py

Lines changed: 59 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,9 @@ def download_from_original_stable_diffusion_ckpt(
11531153
vae_path=None,
11541154
vae=None,
11551155
text_encoder=None,
1156+
text_encoder_2=None,
11561157
tokenizer=None,
1158+
tokenizer_2=None,
11571159
config_files=None,
11581160
) -> DiffusionPipeline:
11591161
"""
@@ -1232,7 +1234,9 @@ def download_from_original_stable_diffusion_ckpt(
12321234
StableDiffusionInpaintPipeline,
12331235
StableDiffusionPipeline,
12341236
StableDiffusionUpscalePipeline,
1237+
StableDiffusionXLControlNetInpaintPipeline,
12351238
StableDiffusionXLImg2ImgPipeline,
1239+
StableDiffusionXLInpaintPipeline,
12361240
StableDiffusionXLPipeline,
12371241
StableUnCLIPImg2ImgPipeline,
12381242
StableUnCLIPPipeline,
@@ -1339,7 +1343,11 @@ def download_from_original_stable_diffusion_ckpt(
13391343
else:
13401344
pipeline_class = StableDiffusionXLPipeline if model_type == "SDXL" else StableDiffusionXLImg2ImgPipeline
13411345

1342-
if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline:
1346+
if num_in_channels is None and pipeline_class in [
1347+
StableDiffusionInpaintPipeline,
1348+
StableDiffusionXLInpaintPipeline,
1349+
StableDiffusionXLControlNetInpaintPipeline,
1350+
]:
13431351
num_in_channels = 9
13441352
if num_in_channels is None and pipeline_class == StableDiffusionUpscalePipeline:
13451353
num_in_channels = 7
@@ -1686,7 +1694,9 @@ def download_from_original_stable_diffusion_ckpt(
16861694
feature_extractor=feature_extractor,
16871695
)
16881696
elif model_type in ["SDXL", "SDXL-Refiner"]:
1689-
if model_type == "SDXL":
1697+
is_refiner = model_type == "SDXL-Refiner"
1698+
1699+
if (is_refiner is False) and (tokenizer is None):
16901700
try:
16911701
tokenizer = CLIPTokenizer.from_pretrained(
16921702
"openai/clip-vit-large-patch14", local_files_only=local_files_only
@@ -1695,7 +1705,11 @@ def download_from_original_stable_diffusion_ckpt(
16951705
raise ValueError(
16961706
f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
16971707
)
1708+
1709+
if (is_refiner is False) and (text_encoder is None):
16981710
text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
1711+
1712+
if tokenizer_2 is None:
16991713
try:
17001714
tokenizer_2 = CLIPTokenizer.from_pretrained(
17011715
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
@@ -1705,95 +1719,69 @@ def download_from_original_stable_diffusion_ckpt(
17051719
f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
17061720
)
17071721

1722+
if text_encoder_2 is None:
17081723
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
17091724
config_kwargs = {"projection_dim": 1280}
1710-
text_encoder_2 = convert_open_clip_checkpoint(
1711-
checkpoint,
1712-
config_name,
1713-
prefix="conditioner.embedders.1.model.",
1714-
has_projection=True,
1715-
local_files_only=local_files_only,
1716-
**config_kwargs,
1717-
)
1718-
1719-
if is_accelerate_available(): # SBM Now move model to cpu.
1720-
if model_type in ["SDXL", "SDXL-Refiner"]:
1721-
for param_name, param in converted_unet_checkpoint.items():
1722-
set_module_tensor_to_device(unet, param_name, "cpu", value=param)
1725+
prefix = "conditioner.embedders.0.model." if is_refiner else "conditioner.embedders.1.model."
17231726

1724-
if controlnet:
1725-
pipe = pipeline_class(
1726-
vae=vae,
1727-
text_encoder=text_encoder,
1728-
tokenizer=tokenizer,
1729-
text_encoder_2=text_encoder_2,
1730-
tokenizer_2=tokenizer_2,
1731-
unet=unet,
1732-
controlnet=controlnet,
1733-
scheduler=scheduler,
1734-
force_zeros_for_empty_prompt=True,
1735-
)
1736-
elif adapter:
1737-
pipe = pipeline_class(
1738-
vae=vae,
1739-
text_encoder=text_encoder,
1740-
tokenizer=tokenizer,
1741-
text_encoder_2=text_encoder_2,
1742-
tokenizer_2=tokenizer_2,
1743-
unet=unet,
1744-
adapter=adapter,
1745-
scheduler=scheduler,
1746-
force_zeros_for_empty_prompt=True,
1747-
)
1748-
else:
1749-
pipe = pipeline_class(
1750-
vae=vae,
1751-
text_encoder=text_encoder,
1752-
tokenizer=tokenizer,
1753-
text_encoder_2=text_encoder_2,
1754-
tokenizer_2=tokenizer_2,
1755-
unet=unet,
1756-
scheduler=scheduler,
1757-
force_zeros_for_empty_prompt=True,
1758-
)
1759-
else:
1760-
tokenizer = None
1761-
text_encoder = None
1762-
try:
1763-
tokenizer_2 = CLIPTokenizer.from_pretrained(
1764-
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
1765-
)
1766-
except Exception:
1767-
raise ValueError(
1768-
f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
1769-
)
1770-
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
1771-
config_kwargs = {"projection_dim": 1280}
17721727
text_encoder_2 = convert_open_clip_checkpoint(
17731728
checkpoint,
17741729
config_name,
1775-
prefix="conditioner.embedders.0.model.",
1730+
prefix=prefix,
17761731
has_projection=True,
17771732
local_files_only=local_files_only,
17781733
**config_kwargs,
17791734
)
17801735

1781-
if is_accelerate_available(): # SBM Now move model to cpu.
1782-
if model_type in ["SDXL", "SDXL-Refiner"]:
1783-
for param_name, param in converted_unet_checkpoint.items():
1784-
set_module_tensor_to_device(unet, param_name, "cpu", value=param)
1736+
if is_accelerate_available(): # SBM Now move model to cpu.
1737+
for param_name, param in converted_unet_checkpoint.items():
1738+
set_module_tensor_to_device(unet, param_name, "cpu", value=param)
17851739

1786-
pipe = StableDiffusionXLImg2ImgPipeline(
1740+
if controlnet:
1741+
pipe = pipeline_class(
17871742
vae=vae,
17881743
text_encoder=text_encoder,
17891744
tokenizer=tokenizer,
17901745
text_encoder_2=text_encoder_2,
17911746
tokenizer_2=tokenizer_2,
17921747
unet=unet,
1748+
controlnet=controlnet,
1749+
scheduler=scheduler,
1750+
force_zeros_for_empty_prompt=True,
1751+
)
1752+
elif adapter:
1753+
pipe = pipeline_class(
1754+
vae=vae,
1755+
text_encoder=text_encoder,
1756+
tokenizer=tokenizer,
1757+
text_encoder_2=text_encoder_2,
1758+
tokenizer_2=tokenizer_2,
1759+
unet=unet,
1760+
adapter=adapter,
17931761
scheduler=scheduler,
1794-
requires_aesthetics_score=True,
1795-
force_zeros_for_empty_prompt=False,
1762+
force_zeros_for_empty_prompt=True,
17961763
)
1764+
1765+
else:
1766+
pipeline_kwargs = {
1767+
"vae": vae,
1768+
"text_encoder": text_encoder,
1769+
"tokenizer": tokenizer,
1770+
"text_encoder_2": text_encoder_2,
1771+
"tokenizer_2": tokenizer_2,
1772+
"unet": unet,
1773+
"scheduler": scheduler,
1774+
}
1775+
1776+
if (pipeline_class == StableDiffusionXLImg2ImgPipeline) or (
1777+
pipeline_class == StableDiffusionXLInpaintPipeline
1778+
):
1779+
pipeline_kwargs.update({"requires_aesthetics_score": is_refiner})
1780+
1781+
if is_refiner:
1782+
pipeline_kwargs.update({"force_zeros_for_empty_prompt": False})
1783+
1784+
pipe = pipeline_class(**pipeline_kwargs)
17971785
else:
17981786
text_config = create_ldm_bert_config(original_config)
17991787
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)

0 commit comments

Comments
 (0)