diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md index cf4c1aefb81f..85d926406368 100644 --- a/docs/source/en/api/pipelines/z_image.md +++ b/docs/source/en/api/pipelines/z_image.md @@ -46,7 +46,7 @@ image = pipe( prompt, image=init_image, strength=0.6, - num_inference_steps=9, + num_inference_steps=8, guidance_scale=0.0, generator=torch.Generator("cuda").manual_seed(42), ).images[0] @@ -81,7 +81,7 @@ image = pipe( image=init_image, mask_image=mask_image, strength=1.0, - num_inference_steps=9, + num_inference_steps=8, guidance_scale=0.0, generator=torch.Generator("cuda").manual_seed(42), ).images[0] diff --git a/src/diffusers/modular_pipelines/z_image/before_denoise.py b/src/diffusers/modular_pipelines/z_image/before_denoise.py index 8558f2c67f65..9b6c1976a1bf 100644 --- a/src/diffusers/modular_pipelines/z_image/before_denoise.py +++ b/src/diffusers/modular_pipelines/z_image/before_denoise.py @@ -185,6 +185,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageTextInputStep(ModularPipelineBlocks): model_name = "z-image" @@ -508,7 +513,7 @@ def description(self) -> str: def inputs(self) -> list[InputParam]: return [ InputParam("latents", required=True), - InputParam("num_inference_steps", default=9), + InputParam("num_inference_steps", default=8), InputParam("sigmas"), ] @@ -535,13 +540,15 @@ def __call__(self, components: ZImageModularPipeline, state: PipelineState) -> P base_shift=components.scheduler.config.get("base_shift", 0.5), max_shift=components.scheduler.config.get("max_shift", 1.15), ) - components.scheduler.sigma_min = 0.0 + sigmas = block_state.sigmas + if sigmas is None: + sigmas = get_default_z_image_sigmas(block_state.num_inference_steps) block_state.timesteps, block_state.num_inference_steps = retrieve_timesteps( components.scheduler, block_state.num_inference_steps, device, - sigmas=block_state.sigmas, + sigmas=sigmas, mu=mu, ) diff --git a/src/diffusers/modular_pipelines/z_image/modular_blocks_z_image.py b/src/diffusers/modular_pipelines/z_image/modular_blocks_z_image.py index 23e20d55fb1e..1440128a3c55 100644 --- a/src/diffusers/modular_pipelines/z_image/modular_blocks_z_image.py +++ b/src/diffusers/modular_pipelines/z_image/modular_blocks_z_image.py @@ -66,7 +66,7 @@ class ZImageCoreDenoiseStep(SequentialPipelineBlocks): TODO: Add description. generator (`None`, *optional*): TODO: Add description. - num_inference_steps (`None`, *optional*, defaults to 9): + num_inference_steps (`None`, *optional*, defaults to 8): TODO: Add description. sigmas (`None`, *optional*): TODO: Add description. @@ -122,7 +122,7 @@ class ZImageImage2ImageCoreDenoiseStep(SequentialPipelineBlocks): TODO: Add description. generator (`None`, *optional*): TODO: Add description. - num_inference_steps (`None`, *optional*, defaults to 9): + num_inference_steps (`None`, *optional*, defaults to 8): TODO: Add description. sigmas (`None`, *optional*): TODO: Add description. diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image.py b/src/diffusers/pipelines/z_image/pipeline_z_image.py index 46403a0719cd..3e2055c6257f 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image.py @@ -51,7 +51,7 @@ ... prompt, ... height=1024, ... width=1024, - ... num_inference_steps=9, + ... num_inference_steps=8, ... guidance_scale=0.0, ... generator=torch.Generator("cuda").manual_seed(42), ... ).images[0] @@ -134,6 +134,10 @@ def retrieve_timesteps( return timesteps, num_inference_steps +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImagePipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): model_cpu_offload_seq = "text_encoder->transformer->vae" _optional_components = [] @@ -474,7 +478,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py index d64999138af7..81373ffb56ff 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py @@ -88,7 +88,7 @@ ... controlnet_conditioning_scale=0.75, ... height=1728, ... width=992, - ... num_inference_steps=9, + ... num_inference_steps=8, ... guidance_scale=0.0, ... generator=torch.Generator("cuda").manual_seed(43), ... ).images[0] @@ -185,6 +185,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageControlNetPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): model_cpu_offload_seq = "text_encoder->transformer->vae" _optional_components = [] @@ -601,7 +606,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py index 40f368f0d070..178e74dea4fa 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py @@ -185,6 +185,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageControlNetInpaintPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): model_cpu_offload_seq = "text_encoder->transformer->vae" _optional_components = [] @@ -628,7 +633,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_img2img.py b/src/diffusers/pipelines/z_image/pipeline_z_image_img2img.py index ee57f51dd957..b5c7740bb0c1 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_img2img.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_img2img.py @@ -49,7 +49,7 @@ ... prompt, ... image=init_image, ... strength=0.6, - ... num_inference_steps=9, + ... num_inference_steps=8, ... guidance_scale=0.0, ... generator=torch.Generator("cuda").manual_seed(42), ... ).images[0] @@ -146,6 +146,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageImg2ImgPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): r""" The ZImage pipeline for image-to-image generation. @@ -563,7 +568,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py b/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py index e740a48e65ec..132c22c0cff3 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py @@ -65,7 +65,7 @@ ... image=init_image, ... mask_image=mask_image, ... strength=1.0, - ... num_inference_steps=9, + ... num_inference_steps=8, ... guidance_scale=0.0, ... generator=torch.Generator("cuda").manual_seed(42), ... ).images[0] @@ -162,6 +162,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageInpaintPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): r""" The ZImage pipeline for inpainting. @@ -750,7 +755,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_omni.py b/src/diffusers/pipelines/z_image/pipeline_z_image_omni.py index 9199e176a1f6..50776ceaf34d 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_omni.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_omni.py @@ -52,7 +52,7 @@ ... prompt, ... height=1024, ... width=1024, - ... num_inference_steps=9, + ... num_inference_steps=8, ... guidance_scale=0.0, ... generator=torch.Generator("cuda").manual_seed(42), ... ).images[0] @@ -135,6 +135,11 @@ def retrieve_timesteps( return timesteps, num_inference_steps +# Copied from diffusers.pipelines.z_image.pipeline_z_image.get_default_z_image_sigmas +def get_default_z_image_sigmas(num_inference_steps: int) -> list[float]: + return torch.linspace(1.0, 1 / num_inference_steps, num_inference_steps).tolist() + + class ZImageOmniPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin): model_cpu_offload_seq = "text_encoder->transformer->vae" _optional_components = [] @@ -604,7 +609,8 @@ def __call__( self.scheduler.config.get("base_shift", 0.5), self.scheduler.config.get("max_shift", 1.15), ) - self.scheduler.sigma_min = 0.0 + if sigmas is None: + sigmas = get_default_z_image_sigmas(num_inference_steps) scheduler_kwargs = {"mu": mu} timesteps, num_inference_steps = retrieve_timesteps( self.scheduler,