Made Mochi work with optimized_attention_override

2025-09-13 04:55:53 +00:00 · 2025-08-28 20:34:06 -07:00
parent ef894cdf08
commit 61b5c5fc75
1 changed files with 8 additions and 3 deletions
--- a/comfy/ldm/genmo/joint_model/asymm_models_joint.py
+++ b/comfy/ldm/genmo/joint_model/asymm_models_joint.py
@@ -109,6 +109,7 @@ class AsymmetricAttention(nn.Module):
        scale_x: torch.Tensor,  # (B, dim_x), modulation for pre-RMSNorm.
        scale_y: torch.Tensor,  # (B, dim_y), modulation for pre-RMSNorm.
        crop_y,
+        transformer_options={},
        **rope_rotation,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        rope_cos = rope_rotation.get("rope_cos")
@@ -143,7 +144,7 @@ class AsymmetricAttention(nn.Module):

        xy = optimized_attention(q,
                                 k,
-                                 v, self.num_heads, skip_reshape=True)
+                                 v, self.num_heads, skip_reshape=True, transformer_options=transformer_options)

        x, y = torch.tensor_split(xy, (q_x.shape[1],), dim=1)
        x = self.proj_x(x)
@@ -224,6 +225,7 @@ class AsymmetricJointBlock(nn.Module):
        x: torch.Tensor,
        c: torch.Tensor,
        y: torch.Tensor,
+        transformer_options={},
        **attn_kwargs,
    ):
        """Forward pass of a block.
@@ -256,6 +258,7 @@ class AsymmetricJointBlock(nn.Module):
            y,
            scale_x=scale_msa_x,
            scale_y=scale_msa_y,
+            transformer_options=transformer_options,
            **attn_kwargs,
        )

@@ -524,10 +527,11 @@ class AsymmDiTJoint(nn.Module):
                                                    args["txt"],
                                                    rope_cos=args["rope_cos"],
                                                    rope_sin=args["rope_sin"],
-                                                    crop_y=args["num_tokens"]
+                                                    crop_y=args["num_tokens"],
+                                                    transformer_options=args["transformer_options"]
                                                    )
                    return out
-                out = blocks_replace[("double_block", i)]({"img": x, "txt": y_feat, "vec": c, "rope_cos": rope_cos, "rope_sin": rope_sin, "num_tokens": num_tokens}, {"original_block": block_wrap})
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": y_feat, "vec": c, "rope_cos": rope_cos, "rope_sin": rope_sin, "num_tokens": num_tokens, "transformer_options": transformer_options}, {"original_block": block_wrap})
                y_feat = out["txt"]
                x = out["img"]
            else:
@@ -538,6 +542,7 @@ class AsymmDiTJoint(nn.Module):
                    rope_cos=rope_cos,
                    rope_sin=rope_sin,
                    crop_y=num_tokens,
+                    transformer_options=transformer_options,
                )  # (B, M, D), (B, L, D)
        del y_feat  # Final layers don't use dense text features.