@@ -1153,7 +1153,9 @@ def download_from_original_stable_diffusion_ckpt(
11531153 vae_path = None ,
11541154 vae = None ,
11551155 text_encoder = None ,
1156+ text_encoder_2 = None ,
11561157 tokenizer = None ,
1158+ tokenizer_2 = None ,
11571159 config_files = None ,
11581160) -> DiffusionPipeline :
11591161 """
@@ -1232,7 +1234,9 @@ def download_from_original_stable_diffusion_ckpt(
12321234 StableDiffusionInpaintPipeline ,
12331235 StableDiffusionPipeline ,
12341236 StableDiffusionUpscalePipeline ,
1237+ StableDiffusionXLControlNetInpaintPipeline ,
12351238 StableDiffusionXLImg2ImgPipeline ,
1239+ StableDiffusionXLInpaintPipeline ,
12361240 StableDiffusionXLPipeline ,
12371241 StableUnCLIPImg2ImgPipeline ,
12381242 StableUnCLIPPipeline ,
@@ -1339,7 +1343,11 @@ def download_from_original_stable_diffusion_ckpt(
13391343 else :
13401344 pipeline_class = StableDiffusionXLPipeline if model_type == "SDXL" else StableDiffusionXLImg2ImgPipeline
13411345
1342- if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline :
1346+ if num_in_channels is None and pipeline_class in [
1347+ StableDiffusionInpaintPipeline ,
1348+ StableDiffusionXLInpaintPipeline ,
1349+ StableDiffusionXLControlNetInpaintPipeline ,
1350+ ]:
13431351 num_in_channels = 9
13441352 if num_in_channels is None and pipeline_class == StableDiffusionUpscalePipeline :
13451353 num_in_channels = 7
@@ -1686,7 +1694,9 @@ def download_from_original_stable_diffusion_ckpt(
16861694 feature_extractor = feature_extractor ,
16871695 )
16881696 elif model_type in ["SDXL" , "SDXL-Refiner" ]:
1689- if model_type == "SDXL" :
1697+ is_refiner = model_type == "SDXL-Refiner"
1698+
1699+ if (is_refiner is False ) and (tokenizer is None ):
16901700 try :
16911701 tokenizer = CLIPTokenizer .from_pretrained (
16921702 "openai/clip-vit-large-patch14" , local_files_only = local_files_only
@@ -1695,7 +1705,11 @@ def download_from_original_stable_diffusion_ckpt(
16951705 raise ValueError (
16961706 f"With local_files_only set to { local_files_only } , you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
16971707 )
1708+
1709+ if (is_refiner is False ) and (text_encoder is None ):
16981710 text_encoder = convert_ldm_clip_checkpoint (checkpoint , local_files_only = local_files_only )
1711+
1712+ if tokenizer_2 is None :
16991713 try :
17001714 tokenizer_2 = CLIPTokenizer .from_pretrained (
17011715 "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" , pad_token = "!" , local_files_only = local_files_only
@@ -1705,95 +1719,69 @@ def download_from_original_stable_diffusion_ckpt(
17051719 f"With local_files_only set to { local_files_only } , you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
17061720 )
17071721
1722+ if text_encoder_2 is None :
17081723 config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
17091724 config_kwargs = {"projection_dim" : 1280 }
1710- text_encoder_2 = convert_open_clip_checkpoint (
1711- checkpoint ,
1712- config_name ,
1713- prefix = "conditioner.embedders.1.model." ,
1714- has_projection = True ,
1715- local_files_only = local_files_only ,
1716- ** config_kwargs ,
1717- )
1718-
1719- if is_accelerate_available (): # SBM Now move model to cpu.
1720- if model_type in ["SDXL" , "SDXL-Refiner" ]:
1721- for param_name , param in converted_unet_checkpoint .items ():
1722- set_module_tensor_to_device (unet , param_name , "cpu" , value = param )
1725+ prefix = "conditioner.embedders.0.model." if is_refiner else "conditioner.embedders.1.model."
17231726
1724- if controlnet :
1725- pipe = pipeline_class (
1726- vae = vae ,
1727- text_encoder = text_encoder ,
1728- tokenizer = tokenizer ,
1729- text_encoder_2 = text_encoder_2 ,
1730- tokenizer_2 = tokenizer_2 ,
1731- unet = unet ,
1732- controlnet = controlnet ,
1733- scheduler = scheduler ,
1734- force_zeros_for_empty_prompt = True ,
1735- )
1736- elif adapter :
1737- pipe = pipeline_class (
1738- vae = vae ,
1739- text_encoder = text_encoder ,
1740- tokenizer = tokenizer ,
1741- text_encoder_2 = text_encoder_2 ,
1742- tokenizer_2 = tokenizer_2 ,
1743- unet = unet ,
1744- adapter = adapter ,
1745- scheduler = scheduler ,
1746- force_zeros_for_empty_prompt = True ,
1747- )
1748- else :
1749- pipe = pipeline_class (
1750- vae = vae ,
1751- text_encoder = text_encoder ,
1752- tokenizer = tokenizer ,
1753- text_encoder_2 = text_encoder_2 ,
1754- tokenizer_2 = tokenizer_2 ,
1755- unet = unet ,
1756- scheduler = scheduler ,
1757- force_zeros_for_empty_prompt = True ,
1758- )
1759- else :
1760- tokenizer = None
1761- text_encoder = None
1762- try :
1763- tokenizer_2 = CLIPTokenizer .from_pretrained (
1764- "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" , pad_token = "!" , local_files_only = local_files_only
1765- )
1766- except Exception :
1767- raise ValueError (
1768- f"With local_files_only set to { local_files_only } , you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
1769- )
1770- config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
1771- config_kwargs = {"projection_dim" : 1280 }
17721727 text_encoder_2 = convert_open_clip_checkpoint (
17731728 checkpoint ,
17741729 config_name ,
1775- prefix = "conditioner.embedders.0.model." ,
1730+ prefix = prefix ,
17761731 has_projection = True ,
17771732 local_files_only = local_files_only ,
17781733 ** config_kwargs ,
17791734 )
17801735
1781- if is_accelerate_available (): # SBM Now move model to cpu.
1782- if model_type in ["SDXL" , "SDXL-Refiner" ]:
1783- for param_name , param in converted_unet_checkpoint .items ():
1784- set_module_tensor_to_device (unet , param_name , "cpu" , value = param )
1736+ if is_accelerate_available (): # SBM Now move model to cpu.
1737+ for param_name , param in converted_unet_checkpoint .items ():
1738+ set_module_tensor_to_device (unet , param_name , "cpu" , value = param )
17851739
1786- pipe = StableDiffusionXLImg2ImgPipeline (
1740+ if controlnet :
1741+ pipe = pipeline_class (
17871742 vae = vae ,
17881743 text_encoder = text_encoder ,
17891744 tokenizer = tokenizer ,
17901745 text_encoder_2 = text_encoder_2 ,
17911746 tokenizer_2 = tokenizer_2 ,
17921747 unet = unet ,
1748+ controlnet = controlnet ,
1749+ scheduler = scheduler ,
1750+ force_zeros_for_empty_prompt = True ,
1751+ )
1752+ elif adapter :
1753+ pipe = pipeline_class (
1754+ vae = vae ,
1755+ text_encoder = text_encoder ,
1756+ tokenizer = tokenizer ,
1757+ text_encoder_2 = text_encoder_2 ,
1758+ tokenizer_2 = tokenizer_2 ,
1759+ unet = unet ,
1760+ adapter = adapter ,
17931761 scheduler = scheduler ,
1794- requires_aesthetics_score = True ,
1795- force_zeros_for_empty_prompt = False ,
1762+ force_zeros_for_empty_prompt = True ,
17961763 )
1764+
1765+ else :
1766+ pipeline_kwargs = {
1767+ "vae" : vae ,
1768+ "text_encoder" : text_encoder ,
1769+ "tokenizer" : tokenizer ,
1770+ "text_encoder_2" : text_encoder_2 ,
1771+ "tokenizer_2" : tokenizer_2 ,
1772+ "unet" : unet ,
1773+ "scheduler" : scheduler ,
1774+ }
1775+
1776+ if (pipeline_class == StableDiffusionXLImg2ImgPipeline ) or (
1777+ pipeline_class == StableDiffusionXLInpaintPipeline
1778+ ):
1779+ pipeline_kwargs .update ({"requires_aesthetics_score" : is_refiner })
1780+
1781+ if is_refiner :
1782+ pipeline_kwargs .update ({"force_zeros_for_empty_prompt" : False })
1783+
1784+ pipe = pipeline_class (** pipeline_kwargs )
17971785 else :
17981786 text_config = create_ldm_bert_config (original_config )
17991787 text_model = convert_ldm_bert_checkpoint (checkpoint , text_config )
0 commit comments