Merge branch 'master' into v3-definition - async v3 nodes do not currently work, but I will fix that in the next v3 PR

2025-09-15 05:57:57 +00:00 · 2025-07-18 14:14:02 -07:00
parent de0901bd02 7f492522b6
commit fd9c34a3eb
37 changed files with 2437 additions and 480 deletions
--- a/comfy_extras/nodes_cfg.py
+++ b/comfy_extras/nodes_cfg.py
@@ -40,6 +40,33 @@ class CFGZeroStar:
        m.set_model_sampler_post_cfg_function(cfg_zero_star)
        return (m, )

+class CFGNorm:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL",),
+                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
+                            }}
+    RETURN_TYPES = ("MODEL",)
+    RETURN_NAMES = ("patched_model",)
+    FUNCTION = "patch"
+    CATEGORY = "advanced/guidance"
+    EXPERIMENTAL = True
+
+    def patch(self, model, strength):
+        m = model.clone()
+        def cfg_norm(args):
+            cond_p = args['cond_denoised']
+            pred_text_ = args["denoised"]
+
+            norm_full_cond = torch.norm(cond_p, dim=1, keepdim=True)
+            norm_pred_text = torch.norm(pred_text_, dim=1, keepdim=True)
+            scale = (norm_full_cond / (norm_pred_text + 1e-8)).clamp(min=0.0, max=1.0)
+            return pred_text_ * scale * strength
+
+        m.set_model_sampler_post_cfg_function(cfg_norm)
+        return (m, )
+
 NODE_CLASS_MAPPINGS = {
-    "CFGZeroStar": CFGZeroStar
+    "CFGZeroStar": CFGZeroStar,
+    "CFGNorm": CFGNorm,
 }
--- a/comfy_extras/nodes_fresca.py
+++ b/comfy_extras/nodes_fresca.py
@@ -71,8 +71,11 @@ class FreSca:
    DESCRIPTION = "Applies frequency-dependent scaling to the guidance"
    def patch(self, model, scale_low, scale_high, freq_cutoff):
        def custom_cfg_function(args):
-            cond = args["conds_out"][0]
-            uncond = args["conds_out"][1]
+            conds_out = args["conds_out"]
+            if len(conds_out) <= 1 or None in args["conds"][:2]:
+                return conds_out
+            cond = conds_out[0]
+            uncond = conds_out[1]

            guidance = cond - uncond
            filtered_guidance = Fourier_filter(
@@ -83,7 +86,7 @@ class FreSca:
            )
            filtered_cond = filtered_guidance + uncond

-            return [filtered_cond, uncond]
+            return [filtered_cond, uncond] + conds_out[2:]

        m = model.clone()
        m.set_model_sampler_pre_cfg_function(custom_cfg_function)
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -247,7 +247,7 @@ class MaskComposite:
        visible_width, visible_height = (right - left, bottom - top,)

        source_portion = source[:, :visible_height, :visible_width]
-        destination_portion = destination[:, top:bottom, left:right]
+        destination_portion = output[:, top:bottom, left:right]

        if operation == "multiply":
            output[:, top:bottom, left:right] = destination_portion * source_portion
--- a/comfy_extras/nodes_pixart.py
+++ b/comfy_extras/nodes_pixart.py
@@ -1,24 +1,24 @@
-from nodes import MAX_RESOLUTION
-
-class CLIPTextEncodePixArtAlpha:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            # "aspect_ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
-            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
-            }}
-
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
-    CATEGORY = "advanced/conditioning"
-    DESCRIPTION = "Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma."
-
-    def encode(self, clip, width, height, text):
-        tokens = clip.tokenize(text)
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height}),)
-
-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodePixArtAlpha": CLIPTextEncodePixArtAlpha,
-}
+from nodes import MAX_RESOLUTION
+
+class CLIPTextEncodePixArtAlpha:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
+            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
+            # "aspect_ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
+            }}
+
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "encode"
+    CATEGORY = "advanced/conditioning"
+    DESCRIPTION = "Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma."
+
+    def encode(self, clip, width, height, text):
+        tokens = clip.tokenize(text)
+        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height}),)
+
+NODE_CLASS_MAPPINGS = {
+    "CLIPTextEncodePixArtAlpha": CLIPTextEncodePixArtAlpha,
+}
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -23,38 +23,78 @@ from comfy.comfy_types.node_typing import IO
 from comfy.weight_adapter import adapters


+def make_batch_extra_option_dict(d, indicies, full_size=None):
+    new_dict = {}
+    for k, v in d.items():
+        newv = v
+        if isinstance(v, dict):
+            newv = make_batch_extra_option_dict(v, indicies, full_size=full_size)
+        elif isinstance(v, torch.Tensor):
+            if full_size is None or v.size(0) == full_size:
+                newv = v[indicies]
+        elif isinstance(v, (list, tuple)) and len(v) == full_size:
+            newv = [v[i] for i in indicies]
+        new_dict[k] = newv
+    return new_dict
+
+
 class TrainSampler(comfy.samplers.Sampler):

-    def __init__(self, loss_fn, optimizer, loss_callback=None):
+    def __init__(self, loss_fn, optimizer, loss_callback=None, batch_size=1, total_steps=1, seed=0, training_dtype=torch.bfloat16):
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.loss_callback = loss_callback
+        self.batch_size = batch_size
+        self.total_steps = total_steps
+        self.seed = seed
+        self.training_dtype = training_dtype

    def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
-        self.optimizer.zero_grad()
-        noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas, noise, latent_image, False)
-        latent = model_wrap.inner_model.model_sampling.noise_scaling(
-            torch.zeros_like(sigmas),
-            torch.zeros_like(noise, requires_grad=True),
-            latent_image,
-            False
-        )
+        cond = model_wrap.conds["positive"]
+        dataset_size = sigmas.size(0)
+        torch.cuda.empty_cache()
+        for i in (pbar:=tqdm.trange(self.total_steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
+            noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(self.seed + i * 1000)
+            indicies = torch.randperm(dataset_size)[:self.batch_size].tolist()

-        # Ensure model is in training mode and computing gradients
-        # x0 pred
-        denoised = model_wrap(noise, sigmas, **extra_args)
-        try:
-            loss = self.loss_fn(denoised, latent.clone())
-        except RuntimeError as e:
-            if "does not require grad and does not have a grad_fn" in str(e):
-                logging.info("WARNING: This is likely due to the model is loaded in inference mode.")
-        loss.backward()
-        if self.loss_callback:
-            self.loss_callback(loss.item())
+            batch_latent = torch.stack([latent_image[i] for i in indicies])
+            batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(batch_latent.device)
+            batch_sigmas = [
+                model_wrap.inner_model.model_sampling.percent_to_sigma(
+                    torch.rand((1,)).item()
+                ) for _ in range(min(self.batch_size, dataset_size))
+            ]
+            batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device)

-        self.optimizer.step()
-        # torch.cuda.memory._dump_snapshot("trainn.pickle")
-        # torch.cuda.memory._record_memory_history(enabled=None)
+            xt = model_wrap.inner_model.model_sampling.noise_scaling(
+                batch_sigmas,
+                batch_noise,
+                batch_latent,
+                False
+            )
+            x0 = model_wrap.inner_model.model_sampling.noise_scaling(
+                torch.zeros_like(batch_sigmas),
+                torch.zeros_like(batch_noise),
+                batch_latent,
+                False
+            )
+
+            model_wrap.conds["positive"] = [
+                cond[i] for i in indicies
+            ]
+            batch_extra_args = make_batch_extra_option_dict(extra_args, indicies, full_size=dataset_size)
+
+            with torch.autocast(xt.device.type, dtype=self.training_dtype):
+                x0_pred = model_wrap(xt, batch_sigmas, **batch_extra_args)
+                loss = self.loss_fn(x0_pred, x0)
+            loss.backward()
+            if self.loss_callback:
+                self.loss_callback(loss.item())
+            pbar.set_postfix({"loss": f"{loss.item():.4f}"})
+
+            self.optimizer.step()
+            self.optimizer.zero_grad()
+        torch.cuda.empty_cache()
        return torch.zeros_like(latent_image)


@@ -584,36 +624,34 @@ class TrainLoraNode:
            loss_map = {"loss": []}
            def loss_callback(loss):
                loss_map["loss"].append(loss)
-                pbar.set_postfix({"loss": f"{loss:.4f}"})
            train_sampler = TrainSampler(
-                criterion, optimizer, loss_callback=loss_callback
+                criterion,
+                optimizer,
+                loss_callback=loss_callback,
+                batch_size=batch_size,
+                total_steps=steps,
+                seed=seed,
+                training_dtype=dtype
            )
            guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp)
            guider.set_conds(positive)  # Set conditioning from input

-            # yoland: this currently resize to the first image in the dataset
-
            # Training loop
-            torch.cuda.empty_cache()
            try:
-                for step in (pbar:=tqdm.trange(steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
-                    # Generate random sigma
-                    sigmas = [mp.model.model_sampling.percent_to_sigma(
-                        torch.rand((1,)).item()
-                    ) for _ in range(min(batch_size, num_images))]
-                    sigmas = torch.tensor(sigmas)
-
-                    noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(step * 1000 + seed)
-
-                    indices = torch.randperm(num_images)[:batch_size]
-                    batch_latent = latents[indices].clone()
-                    guider.set_conds([positive[i] for i in indices])  # Set conditioning from input
-                    guider.sample(noise.generate_noise({"samples": batch_latent}), batch_latent, train_sampler, sigmas, seed=noise.seed)
+                # Generate dummy sigmas and noise
+                sigmas = torch.tensor(range(num_images))
+                noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
+                guider.sample(
+                    noise.generate_noise({"samples": latents}),
+                    latents,
+                    train_sampler,
+                    sigmas,
+                    seed=noise.seed
+                )
            finally:
                for m in mp.model.modules():
                    unpatch(m)
            del train_sampler, optimizer
-            torch.cuda.empty_cache()

            for adapter in all_weight_adapters:
                adapter.requires_grad_(False)