mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-09-11 03:58:22 +00:00
Fixes to make controlnet type models work on qwen edit and kontext. (#9581)
This commit is contained in:
@@ -158,7 +158,7 @@ class Flux(nn.Module):
|
|||||||
if i < len(control_i):
|
if i < len(control_i):
|
||||||
add = control_i[i]
|
add = control_i[i]
|
||||||
if add is not None:
|
if add is not None:
|
||||||
img += add
|
img[:, :add.shape[1]] += add
|
||||||
|
|
||||||
if img.dtype == torch.float16:
|
if img.dtype == torch.float16:
|
||||||
img = torch.nan_to_num(img, nan=0.0, posinf=65504, neginf=-65504)
|
img = torch.nan_to_num(img, nan=0.0, posinf=65504, neginf=-65504)
|
||||||
@@ -189,7 +189,7 @@ class Flux(nn.Module):
|
|||||||
if i < len(control_o):
|
if i < len(control_o):
|
||||||
add = control_o[i]
|
add = control_o[i]
|
||||||
if add is not None:
|
if add is not None:
|
||||||
img[:, txt.shape[1] :, ...] += add
|
img[:, txt.shape[1] : txt.shape[1] + add.shape[1], ...] += add
|
||||||
|
|
||||||
img = img[:, txt.shape[1] :, ...]
|
img = img[:, txt.shape[1] :, ...]
|
||||||
|
|
||||||
|
@@ -459,7 +459,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
if i < len(control_i):
|
if i < len(control_i):
|
||||||
add = control_i[i]
|
add = control_i[i]
|
||||||
if add is not None:
|
if add is not None:
|
||||||
hidden_states += add
|
hidden_states[:, :add.shape[1]] += add
|
||||||
|
|
||||||
hidden_states = self.norm_out(hidden_states, temb)
|
hidden_states = self.norm_out(hidden_states, temb)
|
||||||
hidden_states = self.proj_out(hidden_states)
|
hidden_states = self.proj_out(hidden_states)
|
||||||
|
@@ -89,6 +89,7 @@ class DiffSynthCnetPatch:
|
|||||||
self.strength = strength
|
self.strength = strength
|
||||||
self.mask = mask
|
self.mask = mask
|
||||||
self.encoded_image = model_patch.model.process_input_latent_image(self.encode_latent_cond(image))
|
self.encoded_image = model_patch.model.process_input_latent_image(self.encode_latent_cond(image))
|
||||||
|
self.encoded_image_size = (image.shape[1], image.shape[2])
|
||||||
|
|
||||||
def encode_latent_cond(self, image):
|
def encode_latent_cond(self, image):
|
||||||
latent_image = self.vae.encode(image)
|
latent_image = self.vae.encode(image)
|
||||||
@@ -106,14 +107,15 @@ class DiffSynthCnetPatch:
|
|||||||
x = kwargs.get("x")
|
x = kwargs.get("x")
|
||||||
img = kwargs.get("img")
|
img = kwargs.get("img")
|
||||||
block_index = kwargs.get("block_index")
|
block_index = kwargs.get("block_index")
|
||||||
if self.encoded_image is None or self.encoded_image.shape[1:] != img.shape[1:]:
|
spacial_compression = self.vae.spacial_compression_encode()
|
||||||
spacial_compression = self.vae.spacial_compression_encode()
|
if self.encoded_image is None or self.encoded_image_size != (x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression):
|
||||||
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
|
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
|
||||||
loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
|
loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
|
||||||
self.encoded_image = self.model_patch.model.process_input_latent_image(self.encode_latent_cond(image_scaled.movedim(1, -1)))
|
self.encoded_image = self.model_patch.model.process_input_latent_image(self.encode_latent_cond(image_scaled.movedim(1, -1)))
|
||||||
|
self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
|
||||||
comfy.model_management.load_models_gpu(loaded_models)
|
comfy.model_management.load_models_gpu(loaded_models)
|
||||||
|
|
||||||
img = img + (self.model_patch.model.control_block(img, self.encoded_image.to(img.dtype), block_index) * self.strength)
|
img[:, :self.encoded_image.shape[1]] += (self.model_patch.model.control_block(img[:, :self.encoded_image.shape[1]], self.encoded_image.to(img.dtype), block_index) * self.strength)
|
||||||
kwargs['img'] = img
|
kwargs['img'] = img
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user