Merge branch 'master' into attention-select

2025-09-12 12:37:01 +00:00 · 2025-08-29 23:35:38 -07:00
parent cb959f9669 885015eecf
commit d9bb4530b0
15 changed files with 253 additions and 105 deletions
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -1257,6 +1257,7 @@ class WanModel_S2V(WanModel):
            audio_emb = None

        # embeddings
+        bs, _, time, height, width = x.shape
        x = self.patch_embedding(x.float()).to(x.dtype)
        if control_video is not None:
            x = x + self.cond_encoder(control_video)
@@ -1274,11 +1275,12 @@ class WanModel_S2V(WanModel):
        if reference_latent is not None:
            ref = self.patch_embedding(reference_latent.float()).to(x.dtype)
            ref = ref.flatten(2).transpose(1, 2)
-            freqs_ref = self.rope_encode(reference_latent.shape[-3], reference_latent.shape[-2], reference_latent.shape[-1], t_start=30, device=x.device, dtype=x.dtype)
+            freqs_ref = self.rope_encode(reference_latent.shape[-3], reference_latent.shape[-2], reference_latent.shape[-1], t_start=max(30, time + 9), device=x.device, dtype=x.dtype)
            ref = ref + cond_mask_weight[1]
            x = torch.cat([x, ref], dim=1)
            freqs = torch.cat([freqs, freqs_ref], dim=1)
            t = torch.cat([t, torch.zeros((t.shape[0], reference_latent.shape[-3]), device=t.device, dtype=t.dtype)], dim=1)
+            del ref, freqs_ref

        if reference_motion is not None:
            motion_encoded, freqs_motion = self.frame_packer(reference_motion, self)
@@ -1288,6 +1290,7 @@ class WanModel_S2V(WanModel):

            t = torch.repeat_interleave(t, 2, dim=1)
            t = torch.cat([t, torch.zeros((t.shape[0], 3), device=t.device, dtype=t.dtype)], dim=1)
+            del motion_encoded, freqs_motion

        # time embeddings
        e = self.time_embedding(
@@ -1298,7 +1301,6 @@ class WanModel_S2V(WanModel):
        # context
        context = self.text_embedding(context)

-
        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
        for i, block in enumerate(self.blocks):