Fix default shift and any latent size for qwen image model. (#9186)

This commit is contained in:
comfyanonymous 2025-08-05 03:12:27 -07:00 committed by GitHub
parent 5be6fd09ff
commit d044a24398
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 5 deletions

View File

@ -8,7 +8,7 @@ from einops import repeat
from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
from comfy.ldm.modules.attention import optimized_attention_masked
from comfy.ldm.flux.layers import EmbedND
import comfy.ldm.common_dit
class GELU(nn.Module):
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
@ -364,8 +364,9 @@ class QwenImageTransformer2DModel(nn.Module):
image_rotary_emb = self.pos_embeds(x, context)
orig_shape = x.shape
hidden_states = x.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2)
hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (1, self.patch_size, self.patch_size))
orig_shape = hidden_states.shape
hidden_states = hidden_states.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2)
hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5)
hidden_states = hidden_states.reshape(orig_shape[0], (orig_shape[-2] // 2) * (orig_shape[-1] // 2), orig_shape[1] * 4)
@ -396,4 +397,4 @@ class QwenImageTransformer2DModel(nn.Module):
hidden_states = hidden_states.view(orig_shape[0], orig_shape[-2] // 2, orig_shape[-1] // 2, orig_shape[1], 2, 2)
hidden_states = hidden_states.permute(0, 3, 1, 4, 2, 5)
return hidden_states.reshape(orig_shape)
return hidden_states.reshape(orig_shape)[:, :, :, :x.shape[-2], :x.shape[-1]]

View File

@ -1237,7 +1237,7 @@ class QwenImage(supported_models_base.BASE):
sampling_settings = {
"multiplier": 1.0,
"shift": 2.6,
"shift": 1.15,
}
memory_usage_factor = 1.8 #TODO