LoRA Trainer: LoRA training node in weight adapter scheme (#8446)

2025-09-15 05:57:57 +00:00 · 2025-06-14 07:25:59 +08:00
parent 5bf69bde35
commit 520eb77b72
12 changed files with 949 additions and 24 deletions
--- a/comfy/weight_adapter/init.py
+++ b/comfy/weight_adapter/init.py
@@ -1,4 +1,4 @@
-from .base import WeightAdapterBase
+from .base import WeightAdapterBase, WeightAdapterTrainBase
 from .lora import LoRAAdapter
 from .loha import LoHaAdapter
 from .lokr import LoKrAdapter
@@ -15,3 +15,9 @@ adapters: list[type[WeightAdapterBase]] = [
    OFTAdapter,
    BOFTAdapter,
 ]
+
+__all__ = [
+    "WeightAdapterBase",
+    "WeightAdapterTrainBase",
+    "adapters"
+] + [a.__name__ for a in adapters]
--- a/comfy/weight_adapter/base.py
+++ b/comfy/weight_adapter/base.py
@@ -12,12 +12,20 @@ class WeightAdapterBase:
    weights: list[torch.Tensor]

    @classmethod
-    def load(cls, x: str, lora: dict[str, torch.Tensor]) -> Optional["WeightAdapterBase"]:
+    def load(cls, x: str, lora: dict[str, torch.Tensor], alpha: float, dora_scale: torch.Tensor) -> Optional["WeightAdapterBase"]:
        raise NotImplementedError

    def to_train(self) -> "WeightAdapterTrainBase":
        raise NotImplementedError

+    @classmethod
+    def create_train(cls, weight, *args) -> "WeightAdapterTrainBase":
+        """
+        weight: The original weight tensor to be modified.
+        *args: Additional arguments for configuration, such as rank, alpha etc.
+        """
+        raise NotImplementedError
+
    def calculate_weight(
        self,
        weight,
@@ -33,10 +41,22 @@ class WeightAdapterBase:


 class WeightAdapterTrainBase(nn.Module):
+    # We follow the scheme of PR #7032
    def __init__(self):
        super().__init__()

-    # [TODO] Collaborate with LoRA training PR #7032
+    def __call__(self, w):
+        """
+        w: The original weight tensor to be modified.
+        """
+        raise NotImplementedError
+
+    def passive_memory_usage(self):
+        raise NotImplementedError("passive_memory_usage is not implemented")
+
+    def move_to(self, device):
+        self.to(device)
+        return self.passive_memory_usage()


 def weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function):
@@ -102,3 +122,14 @@ def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Ten
    padded_tensor[new_slices] = tensor[orig_slices]

    return padded_tensor
+
+
+def tucker_weight_from_conv(up, down, mid):
+    up = up.reshape(up.size(0), up.size(1))
+    down = down.reshape(down.size(0), down.size(1))
+    return torch.einsum("m n ..., i m, n j -> i j ...", mid, up, down)
+
+
+def tucker_weight(wa, wb, t):
+    temp = torch.einsum("i j ..., j r -> i r ...", t, wb)
+    return torch.einsum("i j ..., i r -> r j ...", temp, wa)
--- a/comfy/weight_adapter/lora.py
+++ b/comfy/weight_adapter/lora.py
@@ -3,7 +3,56 @@ from typing import Optional

 import torch
 import comfy.model_management
-from .base import WeightAdapterBase, weight_decompose, pad_tensor_to_shape
+from .base import (
+    WeightAdapterBase,
+    WeightAdapterTrainBase,
+    weight_decompose,
+    pad_tensor_to_shape,
+    tucker_weight_from_conv,
+)
+
+
+class LoraDiff(WeightAdapterTrainBase):
+    def __init__(self, weights):
+        super().__init__()
+        mat1, mat2, alpha, mid, dora_scale, reshape = weights
+        out_dim, rank = mat1.shape[0], mat1.shape[1]
+        rank, in_dim = mat2.shape[0], mat2.shape[1]
+        if mid is not None:
+            convdim = mid.ndim - 2
+            layer = (
+                torch.nn.Conv1d,
+                torch.nn.Conv2d,
+                torch.nn.Conv3d
+            )[convdim]
+        else:
+            layer = torch.nn.Linear
+        self.lora_up = layer(rank, out_dim, bias=False)
+        self.lora_down = layer(in_dim, rank, bias=False)
+        self.lora_up.weight.data.copy_(mat1)
+        self.lora_down.weight.data.copy_(mat2)
+        if mid is not None:
+            self.lora_mid = layer(mid, rank, bias=False)
+            self.lora_mid.weight.data.copy_(mid)
+        else:
+            self.lora_mid = None
+        self.rank = rank
+        self.alpha = torch.nn.Parameter(torch.tensor(alpha), requires_grad=False)
+
+    def __call__(self, w):
+        org_dtype = w.dtype
+        if self.lora_mid is None:
+            diff = self.lora_up.weight @ self.lora_down.weight
+        else:
+            diff = tucker_weight_from_conv(
+                self.lora_up.weight, self.lora_down.weight, self.lora_mid.weight
+            )
+        scale = self.alpha / self.rank
+        weight = w + scale * diff.reshape(w.shape)
+        return weight.to(org_dtype)
+
+    def passive_memory_usage(self):
+        return sum(param.numel() * param.element_size() for param in self.parameters())


 class LoRAAdapter(WeightAdapterBase):
@@ -13,6 +62,21 @@ class LoRAAdapter(WeightAdapterBase):
        self.loaded_keys = loaded_keys
        self.weights = weights

+    @classmethod
+    def create_train(cls, weight, rank=1, alpha=1.0):
+        out_dim = weight.shape[0]
+        in_dim = weight.shape[1:].numel()
+        mat1 = torch.empty(out_dim, rank, device=weight.device, dtype=weight.dtype)
+        mat2 = torch.empty(rank, in_dim, device=weight.device, dtype=weight.dtype)
+        torch.nn.init.kaiming_uniform_(mat1, a=5**0.5)
+        torch.nn.init.constant_(mat2, 0.0)
+        return LoraDiff(
+            (mat1, mat2, alpha, None, None, None)
+        )
+
+    def to_train(self):
+        return LoraDiff(self.weights)
+
    @classmethod
    def load(
        cls,