Switch mochi and wan modes to use pytorch RMSNorm. (#7925)

* Switch genmo model to native RMSNorm. * Switch WAN to native RMSNorm.
2025-09-14 21:45:06 +00:00 · 2025-05-03 16:07:55 -07:00
parent 7689917113
commit 3041e5c354
3 changed files with 7 additions and 20 deletions
--- a/comfy/ldm/genmo/joint_model/asymm_models_joint.py
+++ b/comfy/ldm/genmo/joint_model/asymm_models_joint.py
@@ -13,7 +13,6 @@ from comfy.ldm.modules.attention import optimized_attention
 from .layers import (
    FeedForward,
    PatchEmbed,
-    RMSNorm,
    TimestepEmbedder,
 )

@@ -90,10 +89,10 @@ class AsymmetricAttention(nn.Module):

        # Query and key normalization for stability.
        assert qk_norm
-        self.q_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype)
-        self.k_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype)
-        self.q_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype)
-        self.k_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype)
+        self.q_norm_x = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype)
+        self.k_norm_x = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype)
+        self.q_norm_y = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype)
+        self.k_norm_y = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype)

        # Output layers. y features go back down from dim_x -> dim_y.
        self.proj_x = operations.Linear(dim_x, dim_x, bias=out_bias, device=device, dtype=dtype)
--- a/comfy/ldm/genmo/joint_model/layers.py
+++ b/comfy/ldm/genmo/joint_model/layers.py
@@ -151,14 +151,3 @@ class PatchEmbed(nn.Module):

        x = self.norm(x)
        return x
-
-
-class RMSNorm(torch.nn.Module):
-    def __init__(self, hidden_size, eps=1e-5, device=None, dtype=None):
-        super().__init__()
-        self.eps = eps
-        self.weight = torch.nn.Parameter(torch.empty(hidden_size, device=device, dtype=dtype))
-        self.register_parameter("bias", None)
-
-    def forward(self, x):
-        return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps)