diff --git a/comfy_extras/v3/nodes_ace.py b/comfy_extras/v3/nodes_ace.py
index 988e0ed5a..fdad8f800 100644
--- a/comfy_extras/v3/nodes_ace.py
+++ b/comfy_extras/v3/nodes_ace.py
@@ -52,6 +52,6 @@ class EmptyAceStepLatentAudio(io.ComfyNode):
 
 
 NODES_LIST: list[type[io.ComfyNode]] = [
-    TextEncodeAceStepAudio,
     EmptyAceStepLatentAudio,
+    TextEncodeAceStepAudio,
 ]
diff --git a/comfy_extras/v3/nodes_advanced_samplers.py b/comfy_extras/v3/nodes_advanced_samplers.py
index 91512effb..ecbe7094f 100644
--- a/comfy_extras/v3/nodes_advanced_samplers.py
+++ b/comfy_extras/v3/nodes_advanced_samplers.py
@@ -122,7 +122,7 @@ class SamplerEulerCFGpp(io.ComfyNode):
         return io.NodeOutput(sampler)
 
 
-NODES_LIST = [
-    SamplerLCMUpscale,
+NODES_LIST: list[type[io.ComfyNode]] = [
     SamplerEulerCFGpp,
+    SamplerLCMUpscale,
 ]
diff --git a/comfy_extras/v3/nodes_align_your_steps.py b/comfy_extras/v3/nodes_align_your_steps.py
index c2a211c99..acb71c631 100644
--- a/comfy_extras/v3/nodes_align_your_steps.py
+++ b/comfy_extras/v3/nodes_align_your_steps.py
@@ -5,6 +5,18 @@ import torch
 
 from comfy_api.latest import io
 
+
+def loglinear_interp(t_steps, num_steps):
+    """Performs log-linear interpolation of a given array of decreasing numbers."""
+    xs = np.linspace(0, 1, len(t_steps))
+    ys = np.log(t_steps[::-1])
+
+    new_xs = np.linspace(0, 1, num_steps)
+    new_ys = np.interp(new_xs, xs, ys)
+
+    return np.exp(new_ys)[::-1].copy()
+
+
 NOISE_LEVELS = {
     "SD1": [
         14.6146412293,
@@ -36,17 +48,6 @@ NOISE_LEVELS = {
 }
 
 
-def loglinear_interp(t_steps, num_steps):
-    """Performs log-linear interpolation of a given array of decreasing numbers."""
-    xs = np.linspace(0, 1, len(t_steps))
-    ys = np.log(t_steps[::-1])
-
-    new_xs = np.linspace(0, 1, num_steps)
-    new_ys = np.interp(new_xs, xs, ys)
-
-    return np.exp(new_ys)[::-1].copy()
-
-
 class AlignYourStepsScheduler(io.ComfyNode):
     @classmethod
     def define_schema(cls) -> io.Schema:
@@ -78,6 +79,6 @@ class AlignYourStepsScheduler(io.ComfyNode):
         return io.NodeOutput(torch.FloatTensor(sigmas))
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     AlignYourStepsScheduler,
 ]
diff --git a/comfy_extras/v3/nodes_apg.py b/comfy_extras/v3/nodes_apg.py
index 961bdddb3..f9fc208d0 100644
--- a/comfy_extras/v3/nodes_apg.py
+++ b/comfy_extras/v3/nodes_apg.py
@@ -93,6 +93,6 @@ class APG(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     APG,
 ]
diff --git a/comfy_extras/v3/nodes_attention_multiply.py b/comfy_extras/v3/nodes_attention_multiply.py
index 9fb86714f..dfcb85568 100644
--- a/comfy_extras/v3/nodes_attention_multiply.py
+++ b/comfy_extras/v3/nodes_attention_multiply.py
@@ -131,9 +131,9 @@ class UNetTemporalAttentionMultiply(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
-    UNetSelfAttentionMultiply,
-    UNetCrossAttentionMultiply,
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPAttentionMultiply,
+    UNetCrossAttentionMultiply,
+    UNetSelfAttentionMultiply,
     UNetTemporalAttentionMultiply,
 ]
diff --git a/comfy_extras/v3/nodes_audio.py b/comfy_extras/v3/nodes_audio.py
index 994863a42..089c2cb73 100644
--- a/comfy_extras/v3/nodes_audio.py
+++ b/comfy_extras/v3/nodes_audio.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import hashlib
 import os
 
+import av
 import torch
 import torchaudio
 
@@ -12,6 +13,28 @@ import node_helpers
 from comfy_api.latest import io, ui
 
 
+class EmptyLatentAudio(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyLatentAudio_V3",
+            category="latent/audio",
+            inputs=[
+                io.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1),
+                io.Int.Input(
+                    "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."
+                ),
+            ],
+            outputs=[io.Latent.Output()],
+        )
+
+    @classmethod
+    def execute(cls, seconds, batch_size) -> io.NodeOutput:
+        length = round((seconds * 44100 / 2048) / 2) * 2
+        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
+        return io.NodeOutput({"samples": latent, "type": "audio"})
+
+
 class ConditioningStableAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -42,83 +65,71 @@ class ConditioningStableAudio(io.ComfyNode):
         )
 
 
-class EmptyLatentAudio(io.ComfyNode):
+class VAEEncodeAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="EmptyLatentAudio_V3",
+            node_id="VAEEncodeAudio_V3",
             category="latent/audio",
             inputs=[
-                io.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1),
-                io.Int.Input(
-                    id="batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."
-                ),
+                io.Audio.Input("audio"),
+                io.Vae.Input("vae"),
             ],
             outputs=[io.Latent.Output()],
         )
 
     @classmethod
-    def execute(cls, seconds, batch_size) -> io.NodeOutput:
-        length = round((seconds * 44100 / 2048) / 2) * 2
-        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
-        return io.NodeOutput({"samples": latent, "type": "audio"})
+    def execute(cls, vae, audio) -> io.NodeOutput:
+        sample_rate = audio["sample_rate"]
+        if 44100 != sample_rate:
+            waveform = torchaudio.functional.resample(audio["waveform"], sample_rate, 44100)
+        else:
+            waveform = audio["waveform"]
+        return io.NodeOutput({"samples": vae.encode(waveform.movedim(1, -1))})
 
 
-class LoadAudio(io.ComfyNode):
+class VAEDecodeAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="LoadAudio_V3",  # frontend expects "LoadAudio" to work
-            display_name="Load Audio _V3",  # frontend ignores "display_name" for this node
-            category="audio",
+            node_id="VAEDecodeAudio_V3",
+            category="latent/audio",
             inputs=[
-                io.Combo.Input("audio", upload=io.UploadType.audio, options=cls.get_files_options()),
+                io.Latent.Input("samples"),
+                io.Vae.Input("vae"),
             ],
             outputs=[io.Audio.Output()],
         )
 
     @classmethod
-    def get_files_options(cls) -> list[str]:
-        input_dir = folder_paths.get_input_directory()
-        return sorted(folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"]))
-
-    @classmethod
-    def execute(cls, audio) -> io.NodeOutput:
-        waveform, sample_rate = torchaudio.load(folder_paths.get_annotated_filepath(audio))
-        return io.NodeOutput({"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate})
-
-    @classmethod
-    def fingerprint_inputs(s, audio):
-        image_path = folder_paths.get_annotated_filepath(audio)
-        m = hashlib.sha256()
-        with open(image_path, "rb") as f:
-            m.update(f.read())
-        return m.digest().hex()
-
-    @classmethod
-    def validate_inputs(s, audio):
-        if not folder_paths.exists_annotated_filepath(audio):
-            return "Invalid audio file: {}".format(audio)
-        return True
+    def execute(cls, vae, samples) -> io.NodeOutput:
+        audio = vae.decode(samples["samples"]).movedim(-1, 1)
+        std = torch.std(audio, dim=[1, 2], keepdim=True) * 5.0
+        std[std < 1.0] = 1.0
+        audio /= std
+        return io.NodeOutput({"waveform": audio, "sample_rate": 44100})
 
 
-class PreviewAudio(io.ComfyNode):
+class SaveAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="PreviewAudio_V3",  # frontend expects "PreviewAudio" to work
-            display_name="Preview Audio _V3",  # frontend ignores "display_name" for this node
+            node_id="SaveAudio_V3",  # frontend expects "SaveAudio" to work
+            display_name="Save Audio _V3",  # frontend ignores "display_name" for this node
             category="audio",
             inputs=[
                 io.Audio.Input("audio"),
+                io.String.Input("filename_prefix", default="audio/ComfyUI"),
             ],
             hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
             is_output_node=True,
         )
 
     @classmethod
-    def execute(cls, audio) -> io.NodeOutput:
-        return io.NodeOutput(ui=ui.PreviewAudio(audio, cls=cls))
+    def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> io.NodeOutput:
+        return io.NodeOutput(
+            ui=ui.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format)
+        )
 
 
 class SaveAudioMP3(io.ComfyNode):
@@ -171,71 +182,99 @@ class SaveAudioOpus(io.ComfyNode):
         )
 
 
-class SaveAudio(io.ComfyNode):
+class PreviewAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="SaveAudio_V3",  # frontend expects "SaveAudio" to work
-            display_name="Save Audio _V3",  # frontend ignores "display_name" for this node
+            node_id="PreviewAudio_V3",  # frontend expects "PreviewAudio" to work
+            display_name="Preview Audio _V3",  # frontend ignores "display_name" for this node
             category="audio",
             inputs=[
                 io.Audio.Input("audio"),
-                io.String.Input("filename_prefix", default="audio/ComfyUI"),
             ],
             hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
             is_output_node=True,
         )
 
     @classmethod
-    def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> io.NodeOutput:
-        return io.NodeOutput(
-            ui=ui.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format)
-        )
+    def execute(cls, audio) -> io.NodeOutput:
+        return io.NodeOutput(ui=ui.PreviewAudio(audio, cls=cls))
 
 
-class VAEDecodeAudio(io.ComfyNode):
+class LoadAudio(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="VAEDecodeAudio_V3",
-            category="latent/audio",
+            node_id="LoadAudio_V3",  # frontend expects "LoadAudio" to work
+            display_name="Load Audio _V3",  # frontend ignores "display_name" for this node
+            category="audio",
             inputs=[
-                io.Latent.Input("samples"),
-                io.Vae.Input("vae"),
+                io.Combo.Input("audio", upload=io.UploadType.audio, options=cls.get_files_options()),
             ],
             outputs=[io.Audio.Output()],
         )
 
     @classmethod
-    def execute(cls, vae, samples) -> io.NodeOutput:
-        audio = vae.decode(samples["samples"]).movedim(-1, 1)
-        std = torch.std(audio, dim=[1, 2], keepdim=True) * 5.0
-        std[std < 1.0] = 1.0
-        audio /= std
-        return io.NodeOutput({"waveform": audio, "sample_rate": 44100})
-
-
-class VAEEncodeAudio(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="VAEEncodeAudio_V3",
-            category="latent/audio",
-            inputs=[
-                io.Audio.Input("audio"),
-                io.Vae.Input("vae"),
-            ],
-            outputs=[io.Latent.Output()],
-        )
+    def get_files_options(cls) -> list[str]:
+        input_dir = folder_paths.get_input_directory()
+        return sorted(folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"]))
 
     @classmethod
-    def execute(cls, vae, audio) -> io.NodeOutput:
-        sample_rate = audio["sample_rate"]
-        if 44100 != sample_rate:
-            waveform = torchaudio.functional.resample(audio["waveform"], sample_rate, 44100)
-        else:
-            waveform = audio["waveform"]
-        return io.NodeOutput({"samples": vae.encode(waveform.movedim(1, -1))})
+    def load(cls, filepath: str) -> tuple[torch.Tensor, int]:
+        with av.open(filepath) as af:
+            if not af.streams.audio:
+                raise ValueError("No audio stream found in the file.")
+
+            stream = af.streams.audio[0]
+            sr = stream.codec_context.sample_rate
+            n_channels = stream.channels
+
+            frames = []
+            length = 0
+            for frame in af.decode(streams=stream.index):
+                buf = torch.from_numpy(frame.to_ndarray())
+                if buf.shape[0] != n_channels:
+                    buf = buf.view(-1, n_channels).t()
+
+                frames.append(buf)
+                length += buf.shape[1]
+
+            if not frames:
+                raise ValueError("No audio frames decoded.")
+
+            wav = torch.cat(frames, dim=1)
+            wav = cls.f32_pcm(wav)
+            return wav, sr
+
+    @classmethod
+    def f32_pcm(cls, wav: torch.Tensor) -> torch.Tensor:
+        """Convert audio to float 32 bits PCM format."""
+        if wav.dtype.is_floating_point:
+            return wav
+        elif wav.dtype == torch.int16:
+            return wav.float() / (2 ** 15)
+        elif wav.dtype == torch.int32:
+            return wav.float() / (2 ** 31)
+        raise ValueError(f"Unsupported wav dtype: {wav.dtype}")
+
+    @classmethod
+    def execute(cls, audio) -> io.NodeOutput:
+        waveform, sample_rate = cls.load(folder_paths.get_annotated_filepath(audio))
+        return io.NodeOutput({"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate})
+
+    @classmethod
+    def fingerprint_inputs(s, audio):
+        image_path = folder_paths.get_annotated_filepath(audio)
+        m = hashlib.sha256()
+        with open(image_path, "rb") as f:
+            m.update(f.read())
+        return m.digest().hex()
+
+    @classmethod
+    def validate_inputs(s, audio):
+        if not folder_paths.exists_annotated_filepath(audio):
+            return "Invalid audio file: {}".format(audio)
+        return True
 
 
 NODES_LIST: list[type[io.ComfyNode]] = [
@@ -243,9 +282,9 @@ NODES_LIST: list[type[io.ComfyNode]] = [
     EmptyLatentAudio,
     LoadAudio,
     PreviewAudio,
+    SaveAudio,
     SaveAudioMP3,
     SaveAudioOpus,
-    SaveAudio,
     VAEDecodeAudio,
     VAEEncodeAudio,
 ]
diff --git a/comfy_extras/v3/nodes_camera_trajectory.py b/comfy_extras/v3/nodes_camera_trajectory.py
index edc159591..40fb1dcf9 100644
--- a/comfy_extras/v3/nodes_camera_trajectory.py
+++ b/comfy_extras/v3/nodes_camera_trajectory.py
@@ -212,6 +212,6 @@ class WanCameraEmbedding(io.ComfyNode):
         return io.NodeOutput(control_camera_video, width, height, length)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     WanCameraEmbedding,
 ]
diff --git a/comfy_extras/v3/nodes_canny.py b/comfy_extras/v3/nodes_canny.py
index e24b0df38..0b68db381 100644
--- a/comfy_extras/v3/nodes_canny.py
+++ b/comfy_extras/v3/nodes_canny.py
@@ -27,6 +27,6 @@ class Canny(io.ComfyNode):
         return io.NodeOutput(img_out)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     Canny,
 ]
diff --git a/comfy_extras/v3/nodes_cfg.py b/comfy_extras/v3/nodes_cfg.py
index 66ec27f9a..e8e84a2bd 100644
--- a/comfy_extras/v3/nodes_cfg.py
+++ b/comfy_extras/v3/nodes_cfg.py
@@ -5,6 +5,7 @@ import torch
 from comfy_api.latest import io
 
 
+# https://github.com/WeichenFan/CFG-Zero-star
 def optimized_scale(positive, negative):
     positive_flat = positive.reshape(positive.shape[0], -1)
     negative_flat = negative.reshape(negative.shape[0], -1)
@@ -21,6 +22,36 @@ def optimized_scale(positive, negative):
     return st_star.reshape([positive.shape[0]] + [1] * (positive.ndim - 1))
 
 
+class CFGZeroStar(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="CFGZeroStar_V3",
+            category="advanced/guidance",
+            inputs=[
+                io.Model.Input("model"),
+            ],
+            outputs=[io.Model.Output(display_name="patched_model")],
+        )
+
+    @classmethod
+    def execute(cls, model) -> io.NodeOutput:
+        m = model.clone()
+
+        def cfg_zero_star(args):
+            guidance_scale = args['cond_scale']
+            x = args['input']
+            cond_p = args['cond_denoised']
+            uncond_p = args['uncond_denoised']
+            out = args["denoised"]
+            alpha = optimized_scale(x - cond_p, x - uncond_p)
+
+            return out + uncond_p * (alpha - 1.0) + guidance_scale * uncond_p * (1.0 - alpha)
+
+        m.set_model_sampler_post_cfg_function(cfg_zero_star)
+        return io.NodeOutput(m)
+
+
 class CFGNorm(io.ComfyNode):
     @classmethod
     def define_schema(cls) -> io.Schema:
@@ -52,37 +83,7 @@ class CFGNorm(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-class CFGZeroStar(io.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> io.Schema:
-        return io.Schema(
-            node_id="CFGZeroStar_V3",
-            category="advanced/guidance",
-            inputs=[
-                io.Model.Input("model"),
-            ],
-            outputs=[io.Model.Output(display_name="patched_model")],
-        )
-
-    @classmethod
-    def execute(cls, model) -> io.NodeOutput:
-        m = model.clone()
-
-        def cfg_zero_star(args):
-            guidance_scale = args['cond_scale']
-            x = args['input']
-            cond_p = args['cond_denoised']
-            uncond_p = args['uncond_denoised']
-            out = args["denoised"]
-            alpha = optimized_scale(x - cond_p, x - uncond_p)
-
-            return out + uncond_p * (alpha - 1.0) + guidance_scale * uncond_p * (1.0 - alpha)
-
-        m.set_model_sampler_post_cfg_function(cfg_zero_star)
-        return io.NodeOutput(m)
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CFGNorm,
     CFGZeroStar,
 ]
diff --git a/comfy_extras/v3/nodes_clip_sdxl.py b/comfy_extras/v3/nodes_clip_sdxl.py
index 54b83dc16..3d05b7595 100644
--- a/comfy_extras/v3/nodes_clip_sdxl.py
+++ b/comfy_extras/v3/nodes_clip_sdxl.py
@@ -4,6 +4,31 @@ import nodes
 from comfy_api.latest import io
 
 
+class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeSDXLRefiner_V3",
+            category="advanced/conditioning",
+            inputs=[
+                io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
+                io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.String.Input("text", multiline=True, dynamic_prompts=True),
+                io.Clip.Input("clip"),
+            ],
+            outputs=[io.Conditioning.Output()],
+        )
+
+    @classmethod
+    def execute(cls, ascore, width, height, text, clip) -> io.NodeOutput:
+        tokens = clip.tokenize(text)
+        conditioning = clip.encode_from_tokens_scheduled(
+            tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}
+        )
+        return io.NodeOutput(conditioning)
+
+
 class CLIPTextEncodeSDXL(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -48,32 +73,7 @@ class CLIPTextEncodeSDXL(io.ComfyNode):
         return io.NodeOutput(conditioning)
 
 
-class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="CLIPTextEncodeSDXLRefiner_V3",
-            category="advanced/conditioning",
-            inputs=[
-                io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
-                io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
-                io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
-                io.String.Input("text", multiline=True, dynamic_prompts=True),
-                io.Clip.Input("clip"),
-            ],
-            outputs=[io.Conditioning.Output()],
-        )
-
-    @classmethod
-    def execute(cls, ascore, width, height, text, clip) -> io.NodeOutput:
-        tokens = clip.tokenize(text)
-        conditioning = clip.encode_from_tokens_scheduled(
-            tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}
-        )
-        return io.NodeOutput(conditioning)
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeSDXL,
     CLIPTextEncodeSDXLRefiner,
 ]
diff --git a/comfy_extras/v3/nodes_compositing.py b/comfy_extras/v3/nodes_compositing.py
index cfe195148..b1e59ec78 100644
--- a/comfy_extras/v3/nodes_compositing.py
+++ b/comfy_extras/v3/nodes_compositing.py
@@ -112,32 +112,6 @@ def porter_duff_composite(
     return out_image, out_alpha
 
 
-class JoinImageWithAlpha(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="JoinImageWithAlpha_V3",
-            display_name="Join Image with Alpha _V3",
-            category="mask/compositing",
-            inputs=[
-                io.Image.Input("image"),
-                io.Mask.Input("alpha"),
-            ],
-            outputs=[io.Image.Output()],
-        )
-
-    @classmethod
-    def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
-        batch_size = min(len(image), len(alpha))
-        out_images = []
-
-        alpha = 1.0 - resize_mask(alpha, image.shape[1:])
-        for i in range(batch_size):
-            out_images.append(torch.cat((image[i][:, :, :3], alpha[i].unsqueeze(2)), dim=2))
-
-        return io.NodeOutput(torch.stack(out_images))
-
-
 class PorterDuffImageComposite(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -219,7 +193,33 @@ class SplitImageWithAlpha(io.ComfyNode):
         return io.NodeOutput(torch.stack(out_images), 1.0 - torch.stack(out_alphas))
 
 
-NODES_LIST = [
+class JoinImageWithAlpha(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="JoinImageWithAlpha_V3",
+            display_name="Join Image with Alpha _V3",
+            category="mask/compositing",
+            inputs=[
+                io.Image.Input("image"),
+                io.Mask.Input("alpha"),
+            ],
+            outputs=[io.Image.Output()],
+        )
+
+    @classmethod
+    def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
+        batch_size = min(len(image), len(alpha))
+        out_images = []
+
+        alpha = 1.0 - resize_mask(alpha, image.shape[1:])
+        for i in range(batch_size):
+            out_images.append(torch.cat((image[i][:, :, :3], alpha[i].unsqueeze(2)), dim=2))
+
+        return io.NodeOutput(torch.stack(out_images))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     JoinImageWithAlpha,
     PorterDuffImageComposite,
     SplitImageWithAlpha,
diff --git a/comfy_extras/v3/nodes_cond.py b/comfy_extras/v3/nodes_cond.py
index 9d3181886..2ce343500 100644
--- a/comfy_extras/v3/nodes_cond.py
+++ b/comfy_extras/v3/nodes_cond.py
@@ -54,7 +54,7 @@ class T5TokenizerOptions(io.ComfyNode):
         return io.NodeOutput(clip)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeControlnet,
     T5TokenizerOptions,
 ]
diff --git a/comfy_extras/v3/nodes_controlnet.py b/comfy_extras/v3/nodes_controlnet.py
index 4788113a4..a4656fad2 100644
--- a/comfy_extras/v3/nodes_controlnet.py
+++ b/comfy_extras/v3/nodes_controlnet.py
@@ -3,6 +3,33 @@ from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
 from comfy_api.latest import io
 
 
+class SetUnionControlNetType(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SetUnionControlNetType_V3",
+            category="conditioning/controlnet",
+            inputs=[
+                io.ControlNet.Input("control_net"),
+                io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
+            ],
+            outputs=[
+                io.ControlNet.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, control_net, type) -> io.NodeOutput:
+        control_net = control_net.copy()
+        type_number = UNION_CONTROLNET_TYPES.get(type, -1)
+        if type_number >= 0:
+            control_net.set_extra_arg("control_type", [type_number])
+        else:
+            control_net.set_extra_arg("control_type", [])
+
+        return io.NodeOutput(control_net)
+
+
 class ControlNetApplyAdvanced(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -60,33 +87,6 @@ class ControlNetApplyAdvanced(io.ComfyNode):
         return io.NodeOutput(out[0], out[1])
 
 
-class SetUnionControlNetType(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="SetUnionControlNetType_V3",
-            category="conditioning/controlnet",
-            inputs=[
-                io.ControlNet.Input("control_net"),
-                io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
-            ],
-            outputs=[
-                io.ControlNet.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, control_net, type) -> io.NodeOutput:
-        control_net = control_net.copy()
-        type_number = UNION_CONTROLNET_TYPES.get(type, -1)
-        if type_number >= 0:
-            control_net.set_extra_arg("control_type", [type_number])
-        else:
-            control_net.set_extra_arg("control_type", [])
-
-        return io.NodeOutput(control_net)
-
-
 class ControlNetInpaintingAliMamaApply(ControlNetApplyAdvanced):
     @classmethod
     def define_schema(cls):
diff --git a/comfy_extras/v3/nodes_cosmos.py b/comfy_extras/v3/nodes_cosmos.py
index 9779e0ffe..a32c192e8 100644
--- a/comfy_extras/v3/nodes_cosmos.py
+++ b/comfy_extras/v3/nodes_cosmos.py
@@ -9,6 +9,29 @@ import nodes
 from comfy_api.latest import io
 
 
+class EmptyCosmosLatentVideo(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="EmptyCosmosLatentVideo_V3",
+            category="latent/video",
+            inputs=[
+                io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=704, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=121, min=1, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[io.Latent.Output()],
+        )
+
+    @classmethod
+    def execute(cls, width, height, length, batch_size) -> io.NodeOutput:
+        latent = torch.zeros(
+            [batch_size, 16, ((length - 1) // 8) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()
+        )
+        return io.NodeOutput({"samples": latent})
+
+
 def vae_encode_with_padding(vae, image, width, height, length, padding=0):
     pixels = comfy.utils.common_upscale(image[..., :3].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
     pixel_len = min(pixels.shape[0], length)
@@ -116,30 +139,7 @@ class CosmosPredict2ImageToVideoLatent(io.ComfyNode):
         return io.NodeOutput(out_latent)
 
 
-class EmptyCosmosLatentVideo(io.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> io.Schema:
-        return io.Schema(
-            node_id="EmptyCosmosLatentVideo_V3",
-            category="latent/video",
-            inputs=[
-                io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16),
-                io.Int.Input("height", default=704, min=16, max=nodes.MAX_RESOLUTION, step=16),
-                io.Int.Input("length", default=121, min=1, max=nodes.MAX_RESOLUTION, step=8),
-                io.Int.Input("batch_size", default=1, min=1, max=4096),
-            ],
-            outputs=[io.Latent.Output()],
-        )
-
-    @classmethod
-    def execute(cls, width, height, length, batch_size) -> io.NodeOutput:
-        latent = torch.zeros(
-            [batch_size, 16, ((length - 1) // 8) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()
-        )
-        return io.NodeOutput({"samples": latent})
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CosmosImageToVideoLatent,
     CosmosPredict2ImageToVideoLatent,
     EmptyCosmosLatentVideo,
diff --git a/comfy_extras/v3/nodes_custom_sampler.py b/comfy_extras/v3/nodes_custom_sampler.py
new file mode 100644
index 000000000..dca18b6ad
--- /dev/null
+++ b/comfy_extras/v3/nodes_custom_sampler.py
@@ -0,0 +1,1035 @@
+from __future__ import annotations
+
+import math
+
+import torch
+
+import comfy.sample
+import comfy.samplers
+import comfy.utils
+import latent_preview
+import node_helpers
+from comfy.k_diffusion import sa_solver
+from comfy.k_diffusion import sampling as k_diffusion_sampling
+from comfy_api.latest import io
+
+
+class BasicScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BasicScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Combo.Input("scheduler", options=comfy.samplers.SCHEDULER_NAMES),
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, scheduler, steps, denoise):
+        total_steps = steps
+        if denoise < 1.0:
+            if denoise <= 0.0:
+                return io.NodeOutput(torch.FloatTensor([]))
+            total_steps = int(steps/denoise)
+
+        sigmas = comfy.samplers.calculate_sigmas(model.get_model_object("model_sampling"), scheduler, total_steps).cpu()
+        sigmas = sigmas[-(steps + 1):]
+        return io.NodeOutput(sigmas)
+
+
+class KarrasScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="KarrasScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("rho", default=7.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, rho):
+        sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho)
+        return io.NodeOutput(sigmas)
+
+
+class ExponentialScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ExponentialScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min):
+        sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max)
+        return io.NodeOutput(sigmas)
+
+
+class PolyexponentialScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PolyexponentialScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("rho", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, rho):
+        sigmas = k_diffusion_sampling.get_sigmas_polyexponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho)
+        return io.NodeOutput(sigmas)
+
+
+class LaplaceScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LaplaceScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.1, round=False),
+                io.Float.Input("beta", default=0.5, min=0.0, max=10.0, step=0.1, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, mu, beta):
+        sigmas = k_diffusion_sampling.get_sigmas_laplace(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, mu=mu, beta=beta)
+        return io.NodeOutput(sigmas)
+
+
+class SDTurboScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SDTurboScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("steps", default=1, min=1, max=10),
+                io.Float.Input("denoise", default=1.0, min=0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, steps, denoise):
+        start_step = 10 - int(10 * denoise)
+        timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps]
+        sigmas = model.get_model_object("model_sampling").sigma(timesteps)
+        sigmas = torch.cat([sigmas, sigmas.new_zeros([1])])
+        return io.NodeOutput(sigmas)
+
+
+class BetaSamplingScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BetaSamplingScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("alpha", default=0.6, min=0.0, max=50.0, step=0.01, round=False),
+                io.Float.Input("beta", default=0.6, min=0.0, max=50.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, steps, alpha, beta):
+        sigmas = comfy.samplers.beta_scheduler(model.get_model_object("model_sampling"), steps, alpha=alpha, beta=beta)
+        return io.NodeOutput(sigmas)
+
+
+class VPScheduler(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VPScheduler_V3",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("beta_d", default=19.9, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("beta_min", default=0.1, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("eps_s", default=0.001, min=0.0, max=1.0, step=0.0001, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, steps, beta_d, beta_min, eps_s):
+        sigmas = k_diffusion_sampling.get_sigmas_vp(n=steps, beta_d=beta_d, beta_min=beta_min, eps_s=eps_s)
+        return io.NodeOutput(sigmas)
+
+
+class SplitSigmas(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SplitSigmas_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Int.Input("step", default=0, min=0, max=10000),
+            ],
+            outputs=[
+                io.Sigmas.Output(display_name="high_sigmas"),
+                io.Sigmas.Output(display_name="low_sigmas"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sigmas, step):
+        sigmas1 = sigmas[:step + 1]
+        sigmas2 = sigmas[step:]
+        return io.NodeOutput(sigmas1, sigmas2)
+
+
+class SplitSigmasDenoise(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SplitSigmasDenoise_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(display_name="high_sigmas"),
+                io.Sigmas.Output(display_name="low_sigmas"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sigmas, denoise):
+        steps = max(sigmas.shape[-1] - 1, 0)
+        total_steps = round(steps * denoise)
+        sigmas1 = sigmas[:-(total_steps)]
+        sigmas2 = sigmas[-(total_steps + 1):]
+        return io.NodeOutput(sigmas1, sigmas2)
+
+
+class FlipSigmas(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FlipSigmas_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sigmas):
+        if len(sigmas) == 0:
+            return io.NodeOutput(sigmas)
+
+        sigmas = sigmas.flip(0)
+        if sigmas[0] == 0:
+            sigmas[0] = 0.0001
+        return io.NodeOutput(sigmas)
+
+
+class SetFirstSigma(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SetFirstSigma_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Float.Input("sigma", default=136.0, min=0.0, max=20000.0, step=0.001, round=False),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sigmas, sigma):
+        sigmas = sigmas.clone()
+        sigmas[0] = sigma
+        return io.NodeOutput(sigmas)
+
+
+class ExtendIntermediateSigmas(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ExtendIntermediateSigmas_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Int.Input("steps", default=2, min=1, max=100),
+                io.Float.Input("start_at_sigma", default=-1.0, min=-1.0, max=20000.0, step=0.01, round=False),
+                io.Float.Input("end_at_sigma", default=12.0, min=0.0, max=20000.0, step=0.01, round=False),
+                io.Combo.Input("spacing", options=['linear', 'cosine', 'sine']),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str):
+        if start_at_sigma < 0:
+            start_at_sigma = float("inf")
+
+        interpolator = {
+            'linear': lambda x: x,
+            'cosine': lambda x: torch.sin(x*math.pi/2),
+            'sine':   lambda x: 1 - torch.cos(x*math.pi/2)
+        }[spacing]
+
+        # linear space for our interpolation function
+        x = torch.linspace(0, 1, steps + 1, device=sigmas.device)[1:-1]
+        computed_spacing = interpolator(x)
+
+        extended_sigmas = []
+        for i in range(len(sigmas) - 1):
+            sigma_current = sigmas[i]
+            sigma_next = sigmas[i+1]
+
+            extended_sigmas.append(sigma_current)
+
+            if end_at_sigma <= sigma_current <= start_at_sigma:
+                interpolated_steps = computed_spacing * (sigma_next - sigma_current) + sigma_current
+                extended_sigmas.extend(interpolated_steps.tolist())
+
+        # Add the last sigma value
+        if len(sigmas) > 0:
+            extended_sigmas.append(sigmas[-1])
+
+        extended_sigmas = torch.FloatTensor(extended_sigmas)
+
+        return io.NodeOutput(extended_sigmas)
+
+
+class SamplingPercentToSigma(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplingPercentToSigma_V3",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("sampling_percent", default=0.0, min=0.0, max=1.0, step=0.0001),
+                io.Boolean.Input("return_actual_sigma", default=False, tooltip="Return the actual sigma value instead of the value used for interval checks.\nThis only affects results at 0.0 and 1.0."),
+            ],
+            outputs=[
+                io.Float.Output(display_name="sigma_value"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, sampling_percent, return_actual_sigma):
+        model_sampling = model.get_model_object("model_sampling")
+        sigma_val = model_sampling.percent_to_sigma(sampling_percent)
+        if return_actual_sigma:
+            if sampling_percent == 0.0:
+                sigma_val = model_sampling.sigma_max.item()
+            elif sampling_percent == 1.0:
+                sigma_val = model_sampling.sigma_min.item()
+        return io.NodeOutput(sigma_val)
+
+
+class KSamplerSelect(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="KSamplerSelect_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("sampler_name", options=comfy.samplers.SAMPLER_NAMES),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, sampler_name):
+        sampler = comfy.samplers.sampler_object(sampler_name)
+        return io.NodeOutput(sampler)
+
+
+class SamplerDPMPP_3M_SDE(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_3M_SDE_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, eta, s_noise, noise_device):
+        if noise_device == 'cpu':
+            sampler_name = "dpmpp_3m_sde"
+        else:
+            sampler_name = "dpmpp_3m_sde_gpu"
+        sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise})
+        return io.NodeOutput(sampler)
+
+
+class SamplerDPMPP_2M_SDE(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_2M_SDE_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("solver_type", options=['midpoint', 'heun']),
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, solver_type, eta, s_noise, noise_device):
+        if noise_device == 'cpu':
+            sampler_name = "dpmpp_2m_sde"
+        else:
+            sampler_name = "dpmpp_2m_sde_gpu"
+        sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "solver_type": solver_type})
+        return io.NodeOutput(sampler)
+
+
+class SamplerDPMPP_SDE(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_SDE_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("r", default=0.5, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, eta, s_noise, r, noise_device):
+        if noise_device == 'cpu':
+            sampler_name = "dpmpp_sde"
+        else:
+            sampler_name = "dpmpp_sde_gpu"
+        sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "r": r})
+        return io.NodeOutput(sampler)
+
+
+class SamplerDPMPP_2S_Ancestral(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_2S_Ancestral_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, eta, s_noise):
+        sampler = comfy.samplers.ksampler("dpmpp_2s_ancestral", {"eta": eta, "s_noise": s_noise})
+        return io.NodeOutput(sampler)
+
+
+class SamplerEulerAncestral(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerEulerAncestral_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, eta, s_noise):
+        sampler = comfy.samplers.ksampler("euler_ancestral", {"eta": eta, "s_noise": s_noise})
+        return io.NodeOutput(sampler)
+
+
+class SamplerEulerAncestralCFGPP(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerEulerAncestralCFGPP_V3",
+            display_name="SamplerEulerAncestralCFG++ _V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=1.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=10.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, eta, s_noise):
+        sampler = comfy.samplers.ksampler(
+            "euler_ancestral_cfg_pp",
+            {"eta": eta, "s_noise": s_noise})
+        return io.NodeOutput(sampler)
+
+
+class SamplerLMS(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerLMS_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Int.Input("order", default=4, min=1, max=100),
+            ],
+            outputs=[
+                io.Sampler.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, order):
+        sampler = comfy.samplers.ksampler("lms", {"order": order})
+        return io.NodeOutput(sampler)
+
+
+class SamplerDPMAdaptative(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMAdaptative_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Int.Input("order", default=3, min=2, max=3),
+                io.Float.Input("rtol", default=0.05, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("atol", default=0.0078, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("h_init", default=0.05, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("pcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("icoeff", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("dcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("accept_safety", default=0.81, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("eta", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise):
+        sampler = comfy.samplers.ksampler("dpm_adaptive", {"order": order, "rtol": rtol, "atol": atol, "h_init": h_init, "pcoeff": pcoeff,
+                                                              "icoeff": icoeff, "dcoeff": dcoeff, "accept_safety": accept_safety, "eta": eta,
+                                                              "s_noise":s_noise })
+        return io.NodeOutput(sampler)
+
+
+class SamplerER_SDE(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerER_SDE_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("solver_type", options=["ER-SDE", "Reverse-time SDE", "ODE"]),
+                io.Int.Input("max_stage", default=3, min=1, max=3),
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, solver_type, max_stage, eta, s_noise):
+        if solver_type == "ODE" or (solver_type == "Reverse-time SDE" and eta == 0):
+            eta = 0
+            s_noise = 0
+
+        def reverse_time_sde_noise_scaler(x):
+            return x ** (eta + 1)
+
+        if solver_type == "ER-SDE":
+            # Use the default one in sample_er_sde()
+            noise_scaler = None
+        else:
+            noise_scaler = reverse_time_sde_noise_scaler
+
+        sampler_name = "er_sde"
+        sampler = comfy.samplers.ksampler(sampler_name, {"s_noise": s_noise, "noise_scaler": noise_scaler, "max_stage": max_stage})
+        return io.NodeOutput(sampler)
+
+
+class SamplerSASolver(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerSASolver_V3",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("eta", default=1.0, min=0.0, max=10.0, step=0.01, round=False),
+                io.Float.Input("sde_start_percent", default=0.2, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("sde_end_percent", default=0.8, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Int.Input("predictor_order", default=3, min=1, max=6),
+                io.Int.Input("corrector_order", default=4, min=0, max=6),
+                io.Boolean.Input("use_pece"),
+                io.Boolean.Input("simple_order_2"),
+            ],
+            outputs=[
+                io.Sampler.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, eta, sde_start_percent, sde_end_percent, s_noise, predictor_order, corrector_order, use_pece, simple_order_2):
+        model_sampling = model.get_model_object("model_sampling")
+        start_sigma = model_sampling.percent_to_sigma(sde_start_percent)
+        end_sigma = model_sampling.percent_to_sigma(sde_end_percent)
+        tau_func = sa_solver.get_tau_interval_func(start_sigma, end_sigma, eta=eta)
+
+        sampler_name = "sa_solver"
+        sampler = comfy.samplers.ksampler(
+            sampler_name,
+            {
+                "tau_func": tau_func,
+                "s_noise": s_noise,
+                "predictor_order": predictor_order,
+                "corrector_order": corrector_order,
+                "use_pece": use_pece,
+                "simple_order_2": simple_order_2,
+            },
+        )
+        return io.NodeOutput(sampler)
+
+
+class Noise_EmptyNoise:
+    def __init__(self):
+        self.seed = 0
+
+    def generate_noise(self, input_latent):
+        latent_image = input_latent["samples"]
+        return torch.zeros(latent_image.shape, dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
+
+
+class Noise_RandomNoise:
+    def __init__(self, seed):
+        self.seed = seed
+
+    def generate_noise(self, input_latent):
+        latent_image = input_latent["samples"]
+        batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None
+        return comfy.sample.prepare_noise(latent_image, self.seed, batch_inds)
+
+
+class SamplerCustom(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerCustom_V3",
+            category="sampling/custom_sampling",
+            inputs=[
+                io.Model.Input("model"),
+                io.Boolean.Input("add_noise", default=True),
+                io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True),
+                io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Sampler.Input("sampler"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(display_name="output"),
+                io.Latent.Output(display_name="denoised_output"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image):
+        latent = latent_image
+        latent_image = latent["samples"]
+        latent = latent.copy()
+        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)
+        latent["samples"] = latent_image
+
+        if not add_noise:
+            noise = Noise_EmptyNoise().generate_noise(latent)
+        else:
+            noise = Noise_RandomNoise(noise_seed).generate_noise(latent)
+
+        noise_mask = None
+        if "noise_mask" in latent:
+            noise_mask = latent["noise_mask"]
+
+        x0_output = {}
+        callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output)
+
+        disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
+        samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
+
+        out = latent.copy()
+        out["samples"] = samples
+        if "x0" in x0_output:
+            out_denoised = latent.copy()
+            out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
+        else:
+            out_denoised = out
+        return io.NodeOutput(out, out_denoised)
+
+
+class Guider_Basic(comfy.samplers.CFGGuider):
+    def set_conds(self, positive):
+        self.inner_set_conds({"positive": positive})
+
+
+class BasicGuider(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BasicGuider_V3",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("conditioning"),
+            ],
+            outputs=[
+                io.Guider.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, conditioning):
+        guider = Guider_Basic(model)
+        guider.set_conds(conditioning)
+        return io.NodeOutput(guider)
+
+
+class CFGGuider(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CFGGuider_V3",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+            ],
+            outputs=[
+                io.Guider.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, positive, negative, cfg):
+        guider = comfy.samplers.CFGGuider(model)
+        guider.set_conds(positive, negative)
+        guider.set_cfg(cfg)
+        return io.NodeOutput(guider)
+
+
+class Guider_DualCFG(comfy.samplers.CFGGuider):
+    def set_cfg(self, cfg1, cfg2, nested=False):
+        self.cfg1 = cfg1
+        self.cfg2 = cfg2
+        self.nested = nested
+
+    def set_conds(self, positive, middle, negative):
+        middle = node_helpers.conditioning_set_values(middle, {"prompt_type": "negative"})
+        self.inner_set_conds({"positive": positive, "middle": middle, "negative": negative})
+
+    def predict_noise(self, x, timestep, model_options={}, seed=None):
+        negative_cond = self.conds.get("negative", None)
+        middle_cond = self.conds.get("middle", None)
+        positive_cond = self.conds.get("positive", None)
+
+        if self.nested:
+            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
+            pred_text = comfy.samplers.cfg_function(self.inner_model, out[2], out[1], self.cfg1, x, timestep, model_options=model_options, cond=positive_cond, uncond=middle_cond)
+            return out[0] + self.cfg2 * (pred_text - out[0])
+        else:
+            if model_options.get("disable_cfg1_optimization", False) is False:
+                if math.isclose(self.cfg2, 1.0):
+                    negative_cond = None
+                    if math.isclose(self.cfg1, 1.0):
+                        middle_cond = None
+
+            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
+            return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1
+
+
+class DualCFGGuider(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="DualCFGGuider_V3",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("cond1"),
+                io.Conditioning.Input("cond2"),
+                io.Conditioning.Input("negative"),
+                io.Float.Input("cfg_conds", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Float.Input("cfg_cond2_negative", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Combo.Input("style", options=["regular", "nested"]),
+            ],
+            outputs=[
+                io.Guider.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative, style):
+        guider = Guider_DualCFG(model)
+        guider.set_conds(cond1, cond2, negative)
+        guider.set_cfg(cfg_conds, cfg_cond2_negative, nested=(style == "nested"))
+        return io.NodeOutput(guider)
+
+
+class DisableNoise(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="DisableNoise_V3",
+            category="sampling/custom_sampling/noise",
+            inputs=[],
+            outputs=[
+                io.Noise.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls):
+        return io.NodeOutput(Noise_EmptyNoise())
+
+
+class RandomNoise(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RandomNoise_V3",
+            category="sampling/custom_sampling/noise",
+            inputs=[
+                io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True),
+            ],
+            outputs=[
+                io.Noise.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, noise_seed):
+        return io.NodeOutput(Noise_RandomNoise(noise_seed))
+
+
+class SamplerCustomAdvanced(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerCustomAdvanced_V3",
+            category="sampling/custom_sampling",
+            inputs=[
+                io.Noise.Input("noise"),
+                io.Guider.Input("guider"),
+                io.Sampler.Input("sampler"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(display_name="output"),
+                io.Latent.Output(display_name="denoised_output"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, noise, guider, sampler, sigmas, latent_image):
+        latent = latent_image
+        latent_image = latent["samples"]
+        latent = latent.copy()
+        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image)
+        latent["samples"] = latent_image
+
+        noise_mask = None
+        if "noise_mask" in latent:
+            noise_mask = latent["noise_mask"]
+
+        x0_output = {}
+        callback = latent_preview.prepare_callback(guider.model_patcher, sigmas.shape[-1] - 1, x0_output)
+
+        disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
+        samples = guider.sample(noise.generate_noise(latent), latent_image, sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise.seed)
+        samples = samples.to(comfy.model_management.intermediate_device())
+
+        out = latent.copy()
+        out["samples"] = samples
+        if "x0" in x0_output:
+            out_denoised = latent.copy()
+            out_denoised["samples"] = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu())
+        else:
+            out_denoised = out
+        return io.NodeOutput(out, out_denoised)
+
+
+class AddNoise(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="AddNoise_V3",
+            category="_for_testing/custom_sampling/noise",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.Noise.Input("noise"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model, noise, sigmas, latent_image):
+        if len(sigmas) == 0:
+            return io.NodeOutput(latent_image)
+
+        latent = latent_image
+        latent_image = latent["samples"]
+
+        noisy = noise.generate_noise(latent)
+
+        model_sampling = model.get_model_object("model_sampling")
+        process_latent_out = model.get_model_object("process_latent_out")
+        process_latent_in = model.get_model_object("process_latent_in")
+
+        if len(sigmas) > 1:
+            scale = torch.abs(sigmas[0] - sigmas[-1])
+        else:
+            scale = sigmas[0]
+
+        if torch.count_nonzero(latent_image) > 0: #Don't shift the empty latent image.
+            latent_image = process_latent_in(latent_image)
+        noisy = model_sampling.noise_scaling(scale, noisy, latent_image)
+        noisy = process_latent_out(noisy)
+        noisy = torch.nan_to_num(noisy, nan=0.0, posinf=0.0, neginf=0.0)
+
+        out = latent.copy()
+        out["samples"] = noisy
+        return io.NodeOutput(out)
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    AddNoise,
+    BasicGuider,
+    BasicScheduler,
+    BetaSamplingScheduler,
+    CFGGuider,
+    DisableNoise,
+    DualCFGGuider,
+    ExponentialScheduler,
+    ExtendIntermediateSigmas,
+    FlipSigmas,
+    KarrasScheduler,
+    KSamplerSelect,
+    LaplaceScheduler,
+    PolyexponentialScheduler,
+    RandomNoise,
+    SamplerCustom,
+    SamplerCustomAdvanced,
+    SamplerDPMAdaptative,
+    SamplerDPMPP_2M_SDE,
+    SamplerDPMPP_2S_Ancestral,
+    SamplerDPMPP_3M_SDE,
+    SamplerDPMPP_SDE,
+    SamplerER_SDE,
+    SamplerEulerAncestral,
+    SamplerEulerAncestralCFGPP,
+    SamplerLMS,
+    SamplerSASolver,
+    SamplingPercentToSigma,
+    SDTurboScheduler,
+    SetFirstSigma,
+    SplitSigmas,
+    SplitSigmasDenoise,
+    VPScheduler,
+]
diff --git a/comfy_extras/v3/nodes_differential_diffusion.py b/comfy_extras/v3/nodes_differential_diffusion.py
index 6eb8cacbc..b4e5ecdc5 100644
--- a/comfy_extras/v3/nodes_differential_diffusion.py
+++ b/comfy_extras/v3/nodes_differential_diffusion.py
@@ -45,6 +45,6 @@ class DifferentialDiffusion(io.ComfyNode):
         return (denoise_mask >= threshold).to(denoise_mask.dtype)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     DifferentialDiffusion,
 ]
diff --git a/comfy_extras/v3/nodes_edit_model.py b/comfy_extras/v3/nodes_edit_model.py
index 79dd672e3..b6164dc6a 100644
--- a/comfy_extras/v3/nodes_edit_model.py
+++ b/comfy_extras/v3/nodes_edit_model.py
@@ -29,6 +29,6 @@ class ReferenceLatent(io.ComfyNode):
         return io.NodeOutput(conditioning)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     ReferenceLatent,
 ]
diff --git a/comfy_extras/v3/nodes_flux.py b/comfy_extras/v3/nodes_flux.py
index 3967fc4ad..f068f7b98 100644
--- a/comfy_extras/v3/nodes_flux.py
+++ b/comfy_extras/v3/nodes_flux.py
@@ -49,28 +49,6 @@ class CLIPTextEncodeFlux(io.ComfyNode):
 
         return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"guidance": guidance}))
 
-
-class FluxDisableGuidance(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="FluxDisableGuidance_V3",
-            category="advanced/conditioning/flux",
-            description="This node completely disables the guidance embed on Flux and Flux like models",
-            inputs=[
-                io.Conditioning.Input("conditioning"),
-            ],
-            outputs=[
-                io.Conditioning.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, conditioning):
-        c = node_helpers.conditioning_set_values(conditioning, {"guidance": None})
-        return io.NodeOutput(c)
-
-
 class FluxGuidance(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -91,6 +69,25 @@ class FluxGuidance(io.ComfyNode):
         c = node_helpers.conditioning_set_values(conditioning, {"guidance": guidance})
         return io.NodeOutput(c)
 
+class FluxDisableGuidance(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FluxDisableGuidance_V3",
+            category="advanced/conditioning/flux",
+            description="This node completely disables the guidance embed on Flux and Flux like models",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, conditioning):
+        c = node_helpers.conditioning_set_values(conditioning, {"guidance": None})
+        return io.NodeOutput(c)
 
 class FluxKontextImageScale(io.ComfyNode):
     @classmethod
@@ -117,7 +114,7 @@ class FluxKontextImageScale(io.ComfyNode):
         return io.NodeOutput(image)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeFlux,
     FluxDisableGuidance,
     FluxGuidance,
diff --git a/comfy_extras/v3/nodes_freelunch.py b/comfy_extras/v3/nodes_freelunch.py
index fe3e2c9dd..7467a1f88 100644
--- a/comfy_extras/v3/nodes_freelunch.py
+++ b/comfy_extras/v3/nodes_freelunch.py
@@ -125,7 +125,7 @@ class FreeU_V2(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     FreeU,
     FreeU_V2,
 ]
diff --git a/comfy_extras/v3/nodes_fresca.py b/comfy_extras/v3/nodes_fresca.py
index e9057fca5..c4115c84c 100644
--- a/comfy_extras/v3/nodes_fresca.py
+++ b/comfy_extras/v3/nodes_fresca.py
@@ -105,6 +105,6 @@ class FreSca(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     FreSca,
 ]
diff --git a/comfy_extras/v3/nodes_gits.py b/comfy_extras/v3/nodes_gits.py
index 2efb34763..4d500d789 100644
--- a/comfy_extras/v3/nodes_gits.py
+++ b/comfy_extras/v3/nodes_gits.py
@@ -371,6 +371,6 @@ class GITSScheduler(io.ComfyNode):
         return io.NodeOutput(torch.FloatTensor(sigmas))
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     GITSScheduler,
 ]
diff --git a/comfy_extras/v3/nodes_hidream.py b/comfy_extras/v3/nodes_hidream.py
index 8afd3bb13..a7c733774 100644
--- a/comfy_extras/v3/nodes_hidream.py
+++ b/comfy_extras/v3/nodes_hidream.py
@@ -6,33 +6,6 @@ import folder_paths
 from comfy_api.latest import io
 
 
-class CLIPTextEncodeHiDream(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="CLIPTextEncodeHiDream_V3",
-            category="advanced/conditioning",
-            inputs=[
-                io.Clip.Input("clip"),
-                io.String.Input("clip_l", multiline=True, dynamic_prompts=True),
-                io.String.Input("clip_g", multiline=True, dynamic_prompts=True),
-                io.String.Input("t5xxl", multiline=True, dynamic_prompts=True),
-                io.String.Input("llama", multiline=True, dynamic_prompts=True),
-            ],
-            outputs=[
-                io.Conditioning.Output(),
-            ]
-        )
-
-    @classmethod
-    def execute(cls, clip, clip_l, clip_g, t5xxl, llama):
-        tokens = clip.tokenize(clip_g)
-        tokens["l"] = clip.tokenize(clip_l)["l"]
-        tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
-        tokens["llama"] = clip.tokenize(llama)["llama"]
-        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
-
-
 class QuadrupleCLIPLoader(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -65,7 +38,34 @@ class QuadrupleCLIPLoader(io.ComfyNode):
         )
 
 
-NODES_LIST = [
+class CLIPTextEncodeHiDream(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeHiDream_V3",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("clip_l", multiline=True, dynamic_prompts=True),
+                io.String.Input("clip_g", multiline=True, dynamic_prompts=True),
+                io.String.Input("t5xxl", multiline=True, dynamic_prompts=True),
+                io.String.Input("llama", multiline=True, dynamic_prompts=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip, clip_l, clip_g, t5xxl, llama):
+        tokens = clip.tokenize(clip_g)
+        tokens["l"] = clip.tokenize(clip_l)["l"]
+        tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
+        tokens["llama"] = clip.tokenize(llama)["llama"]
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeHiDream,
     QuadrupleCLIPLoader,
 ]
diff --git a/comfy_extras/v3/nodes_hunyuan.py b/comfy_extras/v3/nodes_hunyuan.py
index 1c2262a0e..4ad737d7b 100644
--- a/comfy_extras/v3/nodes_hunyuan.py
+++ b/comfy_extras/v3/nodes_hunyuan.py
@@ -7,16 +7,6 @@ import node_helpers
 import nodes
 from comfy_api.latest import io
 
-PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
-    "<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
-    "1. The main content and theme of the video."
-    "2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects."
-    "3. Actions, events, behaviors temporal relationships, physical movement changes of the objects."
-    "4. background environment, light, style and atmosphere."
-    "5. camera angles, movements, and transitions used in the video:<|eot_id|>\n\n"
-    "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
-    "<|start_header_id|>assistant<|end_header_id|>\n\n"
-)
 
 class CLIPTextEncodeHunyuanDiT(io.ComfyNode):
     @classmethod
@@ -68,6 +58,51 @@ class EmptyHunyuanLatentVideo(io.ComfyNode):
         return io.NodeOutput({"samples":latent})
 
 
+PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
+    "<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
+    "1. The main content and theme of the video."
+    "2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects."
+    "3. Actions, events, behaviors temporal relationships, physical movement changes of the objects."
+    "4. background environment, light, style and atmosphere."
+    "5. camera angles, movements, and transitions used in the video:<|eot_id|>\n\n"
+    "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
+    "<|start_header_id|>assistant<|end_header_id|>\n\n"
+)
+
+
+class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TextEncodeHunyuanVideo_ImageToVideo_V3",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.ClipVisionOutput.Input("clip_vision_output"),
+                io.String.Input("prompt", multiline=True, dynamic_prompts=True),
+                io.Int.Input(
+                    "image_interleave",
+                    default=2,
+                    min=1,
+                    max=512,
+                    tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.",
+                ),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, clip, clip_vision_output, prompt, image_interleave):
+        tokens = clip.tokenize(
+            prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V,
+            image_embeds=clip_vision_output.mm_projected,
+            image_interleave=image_interleave,
+        )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
+
+
 class HunyuanImageToVideo(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -126,40 +161,7 @@ class HunyuanImageToVideo(io.ComfyNode):
         return io.NodeOutput(positive, out_latent)
 
 
-class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="TextEncodeHunyuanVideo_ImageToVideo_V3",
-            category="advanced/conditioning",
-            inputs=[
-                io.Clip.Input("clip"),
-                io.ClipVisionOutput.Input("clip_vision_output"),
-                io.String.Input("prompt", multiline=True, dynamic_prompts=True),
-                io.Int.Input(
-                    "image_interleave",
-                    default=2,
-                    min=1,
-                    max=512,
-                    tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.",
-                ),
-            ],
-            outputs=[
-                io.Conditioning.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, clip, clip_vision_output, prompt, image_interleave):
-        tokens = clip.tokenize(
-            prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V,
-            image_embeds=clip_vision_output.mm_projected,
-            image_interleave=image_interleave,
-        )
-        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeHunyuanDiT,
     EmptyHunyuanLatentVideo,
     HunyuanImageToVideo,
diff --git a/comfy_extras/v3/nodes_hunyuan3d.py b/comfy_extras/v3/nodes_hunyuan3d.py
new file mode 100644
index 000000000..a4594c4c2
--- /dev/null
+++ b/comfy_extras/v3/nodes_hunyuan3d.py
@@ -0,0 +1,672 @@
+from __future__ import annotations
+
+import json
+import os
+import struct
+
+import numpy as np
+import torch
+
+import comfy.model_management
+import folder_paths
+from comfy.cli_args import args
+from comfy.ldm.modules.diffusionmodules.mmdit import (
+    get_1d_sincos_pos_embed_from_grid_torch,
+)
+from comfy_api.latest import io
+
+
+class VOXEL:
+    def __init__(self, data):
+        self.data = data
+
+
+class MESH:
+    def __init__(self, vertices, faces):
+        self.vertices = vertices
+        self.faces = faces
+
+
+def voxel_to_mesh(voxels, threshold=0.5, device=None):
+    if device is None:
+        device = torch.device("cpu")
+    voxels = voxels.to(device)
+
+    binary = (voxels > threshold).float()
+    padded = torch.nn.functional.pad(binary, (1, 1, 1, 1, 1, 1), 'constant', 0)
+
+    D, H, W = binary.shape
+
+    neighbors = torch.tensor([
+        [0, 0, 1],
+        [0, 0, -1],
+        [0, 1, 0],
+        [0, -1, 0],
+        [1, 0, 0],
+        [-1, 0, 0]
+    ], device=device)
+
+    z, y, x = torch.meshgrid(
+        torch.arange(D, device=device),
+        torch.arange(H, device=device),
+        torch.arange(W, device=device),
+        indexing='ij'
+    )
+    voxel_indices = torch.stack([z.flatten(), y.flatten(), x.flatten()], dim=1)
+
+    solid_mask = binary.flatten() > 0
+    solid_indices = voxel_indices[solid_mask]
+
+    corner_offsets = [
+        torch.tensor([
+            [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1]
+        ], device=device),
+        torch.tensor([
+            [0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]
+        ], device=device),
+        torch.tensor([
+            [0, 1, 0], [1, 1, 0], [1, 1, 1], [0, 1, 1]
+        ], device=device),
+        torch.tensor([
+            [0, 0, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0]
+        ], device=device),
+        torch.tensor([
+            [1, 0, 1], [1, 1, 1], [1, 1, 0], [1, 0, 0]
+        ], device=device),
+        torch.tensor([
+            [0, 1, 0], [0, 1, 1], [0, 0, 1], [0, 0, 0]
+        ], device=device)
+    ]
+
+    all_vertices = []
+    all_indices = []
+
+    vertex_count = 0
+
+    for face_idx, offset in enumerate(neighbors):
+        neighbor_indices = solid_indices + offset
+
+        padded_indices = neighbor_indices + 1
+
+        is_exposed = padded[
+            padded_indices[:, 0],
+            padded_indices[:, 1],
+            padded_indices[:, 2]
+        ] == 0
+
+        if not is_exposed.any():
+            continue
+
+        exposed_indices = solid_indices[is_exposed]
+
+        corners = corner_offsets[face_idx].unsqueeze(0)
+
+        face_vertices = exposed_indices.unsqueeze(1) + corners
+
+        all_vertices.append(face_vertices.reshape(-1, 3))
+
+        num_faces = exposed_indices.shape[0]
+        face_indices = torch.arange(
+            vertex_count,
+            vertex_count + 4 * num_faces,
+            device=device
+        ).reshape(-1, 4)
+
+        all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 1], face_indices[:, 2]], dim=1))
+        all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 2], face_indices[:, 3]], dim=1))
+
+        vertex_count += 4 * num_faces
+
+    if len(all_vertices) > 0:
+        vertices = torch.cat(all_vertices, dim=0)
+        faces = torch.cat(all_indices, dim=0)
+    else:
+        vertices = torch.zeros((1, 3))
+        faces = torch.zeros((1, 3))
+
+    v_min = 0
+    v_max = max(voxels.shape)
+
+    vertices = vertices - (v_min + v_max) / 2
+
+    scale = (v_max - v_min) / 2
+    if scale > 0:
+        vertices = vertices / scale
+
+    vertices = torch.fliplr(vertices)
+    return vertices, faces
+
+def voxel_to_mesh_surfnet(voxels, threshold=0.5, device=None):
+    if device is None:
+        device = torch.device("cpu")
+    voxels = voxels.to(device)
+
+    D, H, W = voxels.shape
+
+    padded = torch.nn.functional.pad(voxels, (1, 1, 1, 1, 1, 1), 'constant', 0)
+    z, y, x = torch.meshgrid(
+        torch.arange(D, device=device),
+        torch.arange(H, device=device),
+        torch.arange(W, device=device),
+        indexing='ij'
+    )
+    cell_positions = torch.stack([z.flatten(), y.flatten(), x.flatten()], dim=1)
+
+    corner_offsets = torch.tensor([
+        [0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0],
+        [0, 0, 1], [1, 0, 1], [0, 1, 1], [1, 1, 1]
+    ], device=device)
+
+    corner_values = torch.zeros((cell_positions.shape[0], 8), device=device)
+    for c, (dz, dy, dx) in enumerate(corner_offsets):
+        corner_values[:, c] = padded[
+            cell_positions[:, 0] + dz,
+            cell_positions[:, 1] + dy,
+            cell_positions[:, 2] + dx
+        ]
+
+    corner_signs = corner_values > threshold
+    has_inside = torch.any(corner_signs, dim=1)
+    has_outside = torch.any(~corner_signs, dim=1)
+    contains_surface = has_inside & has_outside
+
+    active_cells = cell_positions[contains_surface]
+    active_signs = corner_signs[contains_surface]
+    active_values = corner_values[contains_surface]
+
+    if active_cells.shape[0] == 0:
+        return torch.zeros((0, 3), device=device), torch.zeros((0, 3), dtype=torch.long, device=device)
+
+    edges = torch.tensor([
+        [0, 1], [0, 2], [0, 4], [1, 3],
+        [1, 5], [2, 3], [2, 6], [3, 7],
+        [4, 5], [4, 6], [5, 7], [6, 7]
+    ], device=device)
+
+    cell_vertices = {}
+    progress = comfy.utils.ProgressBar(100)
+
+    for edge_idx, (e1, e2) in enumerate(edges):
+        progress.update(1)
+        crossing = active_signs[:, e1] != active_signs[:, e2]
+        if not crossing.any():
+            continue
+
+        cell_indices = torch.nonzero(crossing, as_tuple=True)[0]
+
+        v1 = active_values[cell_indices, e1]
+        v2 = active_values[cell_indices, e2]
+
+        t = torch.zeros_like(v1, device=device)
+        denom = v2 - v1
+        valid = denom != 0
+        t[valid] = (threshold - v1[valid]) / denom[valid]
+        t[~valid] = 0.5
+
+        p1 = corner_offsets[e1].float()
+        p2 = corner_offsets[e2].float()
+
+        intersection = p1.unsqueeze(0) + t.unsqueeze(1) * (p2.unsqueeze(0) - p1.unsqueeze(0))
+
+        for i, point in zip(cell_indices.tolist(), intersection):
+            if i not in cell_vertices:
+                cell_vertices[i] = []
+            cell_vertices[i].append(point)
+
+    # Calculate the final vertices as the average of intersection points for each cell
+    vertices = []
+    vertex_lookup = {}
+
+    vert_progress_mod = round(len(cell_vertices)/50)
+
+    for i, points in cell_vertices.items():
+        if not i % vert_progress_mod:
+            progress.update(1)
+
+        if points:
+            vertex = torch.stack(points).mean(dim=0)
+            vertex = vertex + active_cells[i].float()
+            vertex_lookup[tuple(active_cells[i].tolist())] = len(vertices)
+            vertices.append(vertex)
+
+    if not vertices:
+        return torch.zeros((0, 3), device=device), torch.zeros((0, 3), dtype=torch.long, device=device)
+
+    final_vertices = torch.stack(vertices)
+
+    inside_corners_mask = active_signs
+    outside_corners_mask = ~active_signs
+
+    inside_counts = inside_corners_mask.sum(dim=1, keepdim=True).float()
+    outside_counts = outside_corners_mask.sum(dim=1, keepdim=True).float()
+
+    inside_pos = torch.zeros((active_cells.shape[0], 3), device=device)
+    outside_pos = torch.zeros((active_cells.shape[0], 3), device=device)
+
+    for i in range(8):
+        mask_inside = inside_corners_mask[:, i].unsqueeze(1)
+        mask_outside = outside_corners_mask[:, i].unsqueeze(1)
+        inside_pos += corner_offsets[i].float().unsqueeze(0) * mask_inside
+        outside_pos += corner_offsets[i].float().unsqueeze(0) * mask_outside
+
+    inside_pos /= inside_counts
+    outside_pos /= outside_counts
+    gradients = inside_pos - outside_pos
+
+    pos_dirs = torch.tensor([
+        [1, 0, 0],
+        [0, 1, 0],
+        [0, 0, 1]
+    ], device=device)
+
+    cross_products = [
+        torch.linalg.cross(pos_dirs[i].float(), pos_dirs[j].float())
+        for i in range(3) for j in range(i+1, 3)
+    ]
+
+    faces = []
+    all_keys = set(vertex_lookup.keys())
+
+    face_progress_mod = round(len(active_cells)/38*3)
+
+    for pair_idx, (i, j) in enumerate([(0,1), (0,2), (1,2)]):
+        dir_i = pos_dirs[i]
+        dir_j = pos_dirs[j]
+        cross_product = cross_products[pair_idx]
+
+        ni_positions = active_cells + dir_i
+        nj_positions = active_cells + dir_j
+        diag_positions = active_cells + dir_i + dir_j
+
+        alignments = torch.matmul(gradients, cross_product)
+
+        valid_quads = []
+        quad_indices = []
+
+        for idx, active_cell in enumerate(active_cells):
+            if not idx % face_progress_mod:
+                progress.update(1)
+            cell_key = tuple(active_cell.tolist())
+            ni_key = tuple(ni_positions[idx].tolist())
+            nj_key = tuple(nj_positions[idx].tolist())
+            diag_key = tuple(diag_positions[idx].tolist())
+
+            if cell_key in all_keys and ni_key in all_keys and nj_key in all_keys and diag_key in all_keys:
+                v0 = vertex_lookup[cell_key]
+                v1 = vertex_lookup[ni_key]
+                v2 = vertex_lookup[nj_key]
+                v3 = vertex_lookup[diag_key]
+
+                valid_quads.append((v0, v1, v2, v3))
+                quad_indices.append(idx)
+
+        for q_idx, (v0, v1, v2, v3) in enumerate(valid_quads):
+            cell_idx = quad_indices[q_idx]
+            if alignments[cell_idx] > 0:
+                faces.append(torch.tensor([v0, v1, v3], device=device, dtype=torch.long))
+                faces.append(torch.tensor([v0, v3, v2], device=device, dtype=torch.long))
+            else:
+                faces.append(torch.tensor([v0, v3, v1], device=device, dtype=torch.long))
+                faces.append(torch.tensor([v0, v2, v3], device=device, dtype=torch.long))
+
+    if faces:
+        faces = torch.stack(faces)
+    else:
+        faces = torch.zeros((0, 3), dtype=torch.long, device=device)
+
+    v_min = 0
+    v_max = max(D, H, W)
+
+    final_vertices = final_vertices - (v_min + v_max) / 2
+
+    scale = (v_max - v_min) / 2
+    if scale > 0:
+        final_vertices = final_vertices / scale
+
+    final_vertices = torch.fliplr(final_vertices)
+
+    return final_vertices, faces
+
+
+def save_glb(vertices, faces, filepath, metadata=None):
+    """
+    Save PyTorch tensor vertices and faces as a GLB file without external dependencies.
+
+    Parameters:
+    vertices: torch.Tensor of shape (N, 3) - The vertex coordinates
+    faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces)
+    filepath: str - Output filepath (should end with .glb)
+    """
+
+    # Convert tensors to numpy arrays
+    vertices_np = vertices.cpu().numpy().astype(np.float32)
+    faces_np = faces.cpu().numpy().astype(np.uint32)
+
+    vertices_buffer = vertices_np.tobytes()
+    indices_buffer = faces_np.tobytes()
+
+    def pad_to_4_bytes(buffer):
+        padding_length = (4 - (len(buffer) % 4)) % 4
+        return buffer + b'\x00' * padding_length
+
+    vertices_buffer_padded = pad_to_4_bytes(vertices_buffer)
+    indices_buffer_padded = pad_to_4_bytes(indices_buffer)
+
+    buffer_data = vertices_buffer_padded + indices_buffer_padded
+
+    vertices_byte_length = len(vertices_buffer)
+    vertices_byte_offset = 0
+    indices_byte_length = len(indices_buffer)
+    indices_byte_offset = len(vertices_buffer_padded)
+
+    gltf = {
+        "asset": {"version": "2.0", "generator": "ComfyUI"},
+        "buffers": [
+            {
+                "byteLength": len(buffer_data)
+            }
+        ],
+        "bufferViews": [
+            {
+                "buffer": 0,
+                "byteOffset": vertices_byte_offset,
+                "byteLength": vertices_byte_length,
+                "target": 34962  # ARRAY_BUFFER
+            },
+            {
+                "buffer": 0,
+                "byteOffset": indices_byte_offset,
+                "byteLength": indices_byte_length,
+                "target": 34963  # ELEMENT_ARRAY_BUFFER
+            }
+        ],
+        "accessors": [
+            {
+                "bufferView": 0,
+                "byteOffset": 0,
+                "componentType": 5126,  # FLOAT
+                "count": len(vertices_np),
+                "type": "VEC3",
+                "max": vertices_np.max(axis=0).tolist(),
+                "min": vertices_np.min(axis=0).tolist()
+            },
+            {
+                "bufferView": 1,
+                "byteOffset": 0,
+                "componentType": 5125,  # UNSIGNED_INT
+                "count": faces_np.size,
+                "type": "SCALAR"
+            }
+        ],
+        "meshes": [
+            {
+                "primitives": [
+                    {
+                        "attributes": {
+                            "POSITION": 0
+                        },
+                        "indices": 1,
+                        "mode": 4  # TRIANGLES
+                    }
+                ]
+            }
+        ],
+        "nodes": [
+            {
+                "mesh": 0
+            }
+        ],
+        "scenes": [
+            {
+                "nodes": [0]
+            }
+        ],
+        "scene": 0
+    }
+
+    if metadata is not None:
+        gltf["asset"]["extras"] = metadata
+
+    # Convert the JSON to bytes
+    gltf_json = json.dumps(gltf).encode('utf8')
+
+    def pad_json_to_4_bytes(buffer):
+        padding_length = (4 - (len(buffer) % 4)) % 4
+        return buffer + b' ' * padding_length
+
+    gltf_json_padded = pad_json_to_4_bytes(gltf_json)
+
+    # Create the GLB header
+    # Magic glTF
+    glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data))
+
+    # Create JSON chunk header (chunk type 0)
+    json_chunk_header = struct.pack('<II', len(gltf_json_padded), 0x4E4F534A)  # "JSON" in little endian
+
+    # Create BIN chunk header (chunk type 1)
+    bin_chunk_header = struct.pack('<II', len(buffer_data), 0x004E4942)  # "BIN\0" in little endian
+
+    # Write the GLB file
+    with open(filepath, 'wb') as f:
+        f.write(glb_header)
+        f.write(json_chunk_header)
+        f.write(gltf_json_padded)
+        f.write(bin_chunk_header)
+        f.write(buffer_data)
+
+    return filepath
+
+
+class EmptyLatentHunyuan3Dv2(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyLatentHunyuan3Dv2_V3",
+            category="latent/3d",
+            inputs=[
+                io.Int.Input("resolution", default=3072, min=1, max=8192),
+                io.Int.Input("batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch.")
+            ],
+            outputs=[
+                io.Latent.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, resolution, batch_size):
+        latent = torch.zeros([batch_size, 64, resolution], device=comfy.model_management.intermediate_device())
+        return io.NodeOutput({"samples": latent, "type": "hunyuan3dv2"})
+
+
+class Hunyuan3Dv2Conditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Hunyuan3Dv2Conditioning_V3",
+            category="conditioning/video_models",
+            inputs=[
+                io.ClipVisionOutput.Input("clip_vision_output")
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip_vision_output):
+        embeds = clip_vision_output.last_hidden_state
+        positive = [[embeds, {}]]
+        negative = [[torch.zeros_like(embeds), {}]]
+        return io.NodeOutput(positive, negative)
+
+
+class Hunyuan3Dv2ConditioningMultiView(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Hunyuan3Dv2ConditioningMultiView_V3",
+            category="conditioning/video_models",
+            inputs=[
+                io.ClipVisionOutput.Input("front", optional=True),
+                io.ClipVisionOutput.Input("left", optional=True),
+                io.ClipVisionOutput.Input("back", optional=True),
+                io.ClipVisionOutput.Input("right", optional=True)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, front=None, left=None, back=None, right=None):
+        all_embeds = [front, left, back, right]
+        out = []
+        pos_embeds = None
+        for i, e in enumerate(all_embeds):
+            if e is not None:
+                if pos_embeds is None:
+                    pos_embeds = get_1d_sincos_pos_embed_from_grid_torch(e.last_hidden_state.shape[-1], torch.arange(4))
+                out.append(e.last_hidden_state + pos_embeds[i].reshape(1, 1, -1))
+
+        embeds = torch.cat(out, dim=1)
+        positive = [[embeds, {}]]
+        negative = [[torch.zeros_like(embeds), {}]]
+        return io.NodeOutput(positive, negative)
+
+
+class SaveGLB(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveGLB_V3",
+            category="3d",
+            is_output_node=True,
+            inputs=[
+                io.Mesh.Input("mesh"),
+                io.String.Input("filename_prefix", default="mesh/ComfyUI")
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo]
+        )
+
+    @classmethod
+    def execute(cls, mesh, filename_prefix):
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
+        results = []
+
+        metadata = {}
+        if not args.disable_metadata:
+            if cls.hidden.prompt is not None:
+                metadata["prompt"] = json.dumps(cls.hidden.prompt)
+            if cls.hidden.extra_pnginfo is not None:
+                for x in cls.hidden.extra_pnginfo:
+                    metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
+
+        for i in range(mesh.vertices.shape[0]):
+            f = f"{filename}_{counter:05}_.glb"
+            save_glb(mesh.vertices[i], mesh.faces[i], os.path.join(full_output_folder, f), metadata)
+            results.append({
+                "filename": f,
+                "subfolder": subfolder,
+                "type": "output"
+            })
+            counter += 1
+
+        return io.NodeOutput(ui={"ui": {"3d": results}})  # TODO: do we need an additional type of preview for this?
+
+
+class VAEDecodeHunyuan3D(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VAEDecodeHunyuan3D_V3",
+            category="latent/3d",
+            inputs=[
+                io.Latent.Input("samples"),
+                io.Vae.Input("vae"),
+                io.Int.Input("num_chunks", default=8000, min=1000, max=500000),
+                io.Int.Input("octree_resolution", default=256, min=16, max=512)
+            ],
+            outputs=[
+                io.Voxel.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, vae, samples, num_chunks, octree_resolution):
+        voxels = VOXEL(vae.decode(samples["samples"], vae_options={"num_chunks": num_chunks, "octree_resolution": octree_resolution}))
+        return io.NodeOutput(voxels)
+
+
+class VoxelToMesh(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VoxelToMesh_V3",
+            category="3d",
+            inputs=[
+                io.Voxel.Input("voxel"),
+                io.Combo.Input("algorithm", options=["surface net", "basic"]),
+                io.Float.Input("threshold", default=0.6, min=-1.0, max=1.0, step=0.01)
+            ],
+            outputs=[
+                io.Mesh.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, voxel, algorithm, threshold):
+        vertices = []
+        faces = []
+
+        if algorithm == "basic":
+            mesh_function = voxel_to_mesh
+        elif algorithm == "surface net":
+            mesh_function = voxel_to_mesh_surfnet
+
+        for x in voxel.data:
+            v, f = mesh_function(x, threshold=threshold, device=None)
+            vertices.append(v)
+            faces.append(f)
+
+        return io.NodeOutput(MESH(torch.stack(vertices), torch.stack(faces)))
+
+
+class VoxelToMeshBasic(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VoxelToMeshBasic_V3",
+            category="3d",
+            inputs=[
+                io.Voxel.Input("voxel"),
+                io.Float.Input("threshold", default=0.6, min=-1.0, max=1.0, step=0.01)
+            ],
+            outputs=[
+                io.Mesh.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, voxel, threshold):
+        vertices = []
+        faces = []
+        for x in voxel.data:
+            v, f = voxel_to_mesh(x, threshold=threshold, device=None)
+            vertices.append(v)
+            faces.append(f)
+
+        return io.NodeOutput(MESH(torch.stack(vertices), torch.stack(faces)))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    EmptyLatentHunyuan3Dv2,
+    Hunyuan3Dv2Conditioning,
+    Hunyuan3Dv2ConditioningMultiView,
+    SaveGLB,
+    VAEDecodeHunyuan3D,
+    VoxelToMesh,
+    VoxelToMeshBasic,
+]
diff --git a/comfy_extras/v3/nodes_hypernetwork.py b/comfy_extras/v3/nodes_hypernetwork.py
index 3981324d7..64bab0f48 100644
--- a/comfy_extras/v3/nodes_hypernetwork.py
+++ b/comfy_extras/v3/nodes_hypernetwork.py
@@ -131,6 +131,6 @@ class HypernetworkLoader(io.ComfyNode):
         return io.NodeOutput(model_hypernetwork)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     HypernetworkLoader,
 ]
diff --git a/comfy_extras/v3/nodes_hypertile.py b/comfy_extras/v3/nodes_hypertile.py
index cec010644..f0bfdb47a 100644
--- a/comfy_extras/v3/nodes_hypertile.py
+++ b/comfy_extras/v3/nodes_hypertile.py
@@ -90,6 +90,6 @@ class HyperTile(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     HyperTile,
 ]
diff --git a/comfy_extras/v3/nodes_ip2p.py b/comfy_extras/v3/nodes_ip2p.py
index d784797b4..f070e329b 100644
--- a/comfy_extras/v3/nodes_ip2p.py
+++ b/comfy_extras/v3/nodes_ip2p.py
@@ -51,6 +51,6 @@ class InstructPixToPixConditioning(io.ComfyNode):
         return io.NodeOutput(out[0], out[1], out_latent)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     InstructPixToPixConditioning,
 ]
diff --git a/comfy_extras/v3/nodes_latent.py b/comfy_extras/v3/nodes_latent.py
index 47966c518..a80a05cfb 100644
--- a/comfy_extras/v3/nodes_latent.py
+++ b/comfy_extras/v3/nodes_latent.py
@@ -44,16 +44,15 @@ class LatentAdd(io.ComfyNode):
         return io.NodeOutput(samples_out)
 
 
-class LatentApplyOperation(io.ComfyNode):
+class LatentSubtract(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="LatentApplyOperation_V3",
-            category="latent/advanced/operations",
-            is_experimental=True,
+            node_id="LatentSubtract_V3",
+            category="latent/advanced",
             inputs=[
-                io.Latent.Input("samples"),
-                io.LatentOperation.Input("operation"),
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
             ],
             outputs=[
                 io.Latent.Output(),
@@ -61,44 +60,78 @@ class LatentApplyOperation(io.ComfyNode):
         )
 
     @classmethod
-    def execute(cls, samples, operation):
-        samples_out = samples.copy()
+    def execute(cls, samples1, samples2):
+        samples_out = samples1.copy()
 
-        s1 = samples["samples"]
-        samples_out["samples"] = operation(latent=s1)
+        s1 = samples1["samples"]
+        s2 = samples2["samples"]
+
+        s2 = reshape_latent_to(s1.shape, s2)
+        samples_out["samples"] = s1 - s2
         return io.NodeOutput(samples_out)
 
 
-class LatentApplyOperationCFG(io.ComfyNode):
+class LatentMultiply(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="LatentApplyOperationCFG_V3",
-            category="latent/advanced/operations",
-            is_experimental=True,
+            node_id="LatentMultiply_V3",
+            category="latent/advanced",
             inputs=[
-                io.Model.Input("model"),
-                io.LatentOperation.Input("operation"),
+                io.Latent.Input("samples"),
+                io.Float.Input("multiplier", default=1.0, min=-10.0, max=10.0, step=0.01),
             ],
             outputs=[
-                io.Model.Output(),
+                io.Latent.Output(),
             ],
         )
 
     @classmethod
-    def execute(cls, model, operation):
-        m = model.clone()
+    def execute(cls, samples, multiplier):
+        samples_out = samples.copy()
 
-        def pre_cfg_function(args):
-            conds_out = args["conds_out"]
-            if len(conds_out) == 2:
-                conds_out[0] = operation(latent=(conds_out[0] - conds_out[1])) + conds_out[1]
-            else:
-                conds_out[0] = operation(latent=conds_out[0])
-            return conds_out
+        s1 = samples["samples"]
+        samples_out["samples"] = s1 * multiplier
+        return io.NodeOutput(samples_out)
 
-        m.set_model_sampler_pre_cfg_function(pre_cfg_function)
-        return io.NodeOutput(m)
+
+class LatentInterpolate(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentInterpolate_V3",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+                io.Float.Input("ratio", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, samples1, samples2, ratio):
+        samples_out = samples1.copy()
+
+        s1 = samples1["samples"]
+        s2 = samples2["samples"]
+
+        s2 = reshape_latent_to(s1.shape, s2)
+
+        m1 = torch.linalg.vector_norm(s1, dim=(1))
+        m2 = torch.linalg.vector_norm(s2, dim=(1))
+
+        s1 = torch.nan_to_num(s1 / m1)
+        s2 = torch.nan_to_num(s2 / m2)
+
+        t = (s1 * ratio + s2 * (1.0 - ratio))
+        mt = torch.linalg.vector_norm(t, dim=(1))
+        st = torch.nan_to_num(t / mt)
+
+        samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
+        return io.NodeOutput(samples_out)
 
 
 class LatentBatch(io.ComfyNode):
@@ -159,54 +192,16 @@ class LatentBatchSeedBehavior(io.ComfyNode):
         return io.NodeOutput(samples_out)
 
 
-class LatentInterpolate(io.ComfyNode):
+class LatentApplyOperation(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="LatentInterpolate_V3",
-            category="latent/advanced",
-            inputs=[
-                io.Latent.Input("samples1"),
-                io.Latent.Input("samples2"),
-                io.Float.Input("ratio", default=1.0, min=0.0, max=1.0, step=0.01),
-            ],
-            outputs=[
-                io.Latent.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, samples1, samples2, ratio):
-        samples_out = samples1.copy()
-
-        s1 = samples1["samples"]
-        s2 = samples2["samples"]
-
-        s2 = reshape_latent_to(s1.shape, s2)
-
-        m1 = torch.linalg.vector_norm(s1, dim=(1))
-        m2 = torch.linalg.vector_norm(s2, dim=(1))
-
-        s1 = torch.nan_to_num(s1 / m1)
-        s2 = torch.nan_to_num(s2 / m2)
-
-        t = (s1 * ratio + s2 * (1.0 - ratio))
-        mt = torch.linalg.vector_norm(t, dim=(1))
-        st = torch.nan_to_num(t / mt)
-
-        samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
-        return io.NodeOutput(samples_out)
-
-
-class LatentMultiply(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LatentMultiply_V3",
-            category="latent/advanced",
+            node_id="LatentApplyOperation_V3",
+            category="latent/advanced/operations",
+            is_experimental=True,
             inputs=[
                 io.Latent.Input("samples"),
-                io.Float.Input("multiplier", default=1.0, min=-10.0, max=10.0, step=0.01),
+                io.LatentOperation.Input("operation"),
             ],
             outputs=[
                 io.Latent.Output(),
@@ -214,14 +209,81 @@ class LatentMultiply(io.ComfyNode):
         )
 
     @classmethod
-    def execute(cls, samples, multiplier):
+    def execute(cls, samples, operation):
         samples_out = samples.copy()
 
         s1 = samples["samples"]
-        samples_out["samples"] = s1 * multiplier
+        samples_out["samples"] = operation(latent=s1)
         return io.NodeOutput(samples_out)
 
 
+class LatentApplyOperationCFG(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentApplyOperationCFG_V3",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.LatentOperation.Input("operation"),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, operation):
+        m = model.clone()
+
+        def pre_cfg_function(args):
+            conds_out = args["conds_out"]
+            if len(conds_out) == 2:
+                conds_out[0] = operation(latent=(conds_out[0] - conds_out[1])) + conds_out[1]
+            else:
+                conds_out[0] = operation(latent=conds_out[0])
+            return conds_out
+
+        m.set_model_sampler_pre_cfg_function(pre_cfg_function)
+        return io.NodeOutput(m)
+
+
+class LatentOperationTonemapReinhard(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentOperationTonemapReinhard_V3",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Float.Input("multiplier", default=1.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.LatentOperation.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, multiplier):
+        def tonemap_reinhard(latent, **kwargs):
+            latent_vector_magnitude = (torch.linalg.vector_norm(latent, dim=(1)) + 0.0000000001)[:,None]
+            normalized_latent = latent / latent_vector_magnitude
+
+            mean = torch.mean(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
+            std = torch.std(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
+
+            top = (std * 5 + mean) * multiplier
+
+            #reinhard
+            latent_vector_magnitude *= (1.0 / top)
+            new_magnitude = latent_vector_magnitude / (latent_vector_magnitude + 1.0)
+            new_magnitude *= top
+
+            return normalized_latent * new_magnitude
+        return io.NodeOutput(tonemap_reinhard)
+
+
 class LatentOperationSharpen(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -264,69 +326,7 @@ class LatentOperationSharpen(io.ComfyNode):
         return io.NodeOutput(sharpen)
 
 
-class LatentOperationTonemapReinhard(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LatentOperationTonemapReinhard_V3",
-            category="latent/advanced/operations",
-            is_experimental=True,
-            inputs=[
-                io.Float.Input("multiplier", default=1.0, min=0.0, max=100.0, step=0.01),
-            ],
-            outputs=[
-                io.LatentOperation.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, multiplier):
-        def tonemap_reinhard(latent, **kwargs):
-            latent_vector_magnitude = (torch.linalg.vector_norm(latent, dim=(1)) + 0.0000000001)[:,None]
-            normalized_latent = latent / latent_vector_magnitude
-
-            mean = torch.mean(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
-            std = torch.std(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
-
-            top = (std * 5 + mean) * multiplier
-
-            #reinhard
-            latent_vector_magnitude *= (1.0 / top)
-            new_magnitude = latent_vector_magnitude / (latent_vector_magnitude + 1.0)
-            new_magnitude *= top
-
-            return normalized_latent * new_magnitude
-        return io.NodeOutput(tonemap_reinhard)
-
-
-class LatentSubtract(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LatentSubtract_V3",
-            category="latent/advanced",
-            inputs=[
-                io.Latent.Input("samples1"),
-                io.Latent.Input("samples2"),
-            ],
-            outputs=[
-                io.Latent.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, samples1, samples2):
-        samples_out = samples1.copy()
-
-        s1 = samples1["samples"]
-        s2 = samples2["samples"]
-
-        s2 = reshape_latent_to(s1.shape, s2)
-        samples_out["samples"] = s1 - s2
-        return io.NodeOutput(samples_out)
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     LatentAdd,
     LatentApplyOperation,
     LatentApplyOperationCFG,
diff --git a/comfy_extras/v3/nodes_load_3d.py b/comfy_extras/v3/nodes_load_3d.py
index b0ab3db16..d83100a80 100644
--- a/comfy_extras/v3/nodes_load_3d.py
+++ b/comfy_extras/v3/nodes_load_3d.py
@@ -172,7 +172,7 @@ class Preview3DAnimation(io.ComfyNode):
         return io.NodeOutput(ui=ui.PreviewUI3D(model_file, camera_info, cls=cls))
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     Load3D,
     Load3DAnimation,
     Preview3D,
diff --git a/comfy_extras/v3/nodes_lora_extract.py b/comfy_extras/v3/nodes_lora_extract.py
index 54bd051d4..f5896abde 100644
--- a/comfy_extras/v3/nodes_lora_extract.py
+++ b/comfy_extras/v3/nodes_lora_extract.py
@@ -133,6 +133,6 @@ class LoraSave(io.ComfyNode):
         return io.NodeOutput()
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     LoraSave,
 ]
diff --git a/comfy_extras/v3/nodes_lotus.py b/comfy_extras/v3/nodes_lotus.py
index f2190c753..86a581c1c 100644
--- a/comfy_extras/v3/nodes_lotus.py
+++ b/comfy_extras/v3/nodes_lotus.py
@@ -29,6 +29,6 @@ class LotusConditioning(io.ComfyNode):
         return io.NodeOutput(cond)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     LotusConditioning,
 ]
diff --git a/comfy_extras/v3/nodes_lt.py b/comfy_extras/v3/nodes_lt.py
index 6e4604480..3b6560499 100644
--- a/comfy_extras/v3/nodes_lt.py
+++ b/comfy_extras/v3/nodes_lt.py
@@ -127,12 +127,12 @@ class LTXVAddGuide(io.ComfyNode):
                 io.Vae.Input("vae"),
                 io.Latent.Input("latent"),
                 io.Image.Input(
-                    id="image",
+                    "image",
                     tooltip="Image or video to condition the latent video on. Must be 8*n + 1 frames. "
                             "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.",
                 ),
                 io.Int.Input(
-                    id="frame_idx",
+                    "frame_idx",
                     default=0,
                     min=-9999,
                     max=9999,
@@ -516,7 +516,7 @@ class ModelSamplingLTXV(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     EmptyLTXVLatentVideo,
     LTXVAddGuide,
     LTXVConditioning,
diff --git a/comfy_extras/v3/nodes_lumina2.py b/comfy_extras/v3/nodes_lumina2.py
index 470537ccc..d31895bdd 100644
--- a/comfy_extras/v3/nodes_lumina2.py
+++ b/comfy_extras/v3/nodes_lumina2.py
@@ -5,50 +5,6 @@ import torch
 from comfy_api.latest import io
 
 
-class CLIPTextEncodeLumina2(io.ComfyNode):
-    SYSTEM_PROMPT = {
-        "superior": "You are an assistant designed to generate superior images with the superior "
-            "degree of image-text alignment based on textual prompts or user prompts.",
-        "alignment": "You are an assistant designed to generate high-quality images with the "
-            "highest degree of image-text alignment based on textual prompts."
-    }
-    SYSTEM_PROMPT_TIP = "Lumina2 provide two types of system prompts:" \
-        "Superior: You are an assistant designed to generate superior images with the superior "\
-        "degree of image-text alignment based on textual prompts or user prompts. "\
-        "Alignment: You are an assistant designed to generate high-quality images with the highest "\
-        "degree of image-text alignment based on textual prompts."
-
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="CLIPTextEncodeLumina2_V3",
-            display_name="CLIP Text Encode for Lumina2 _V3",
-            category="conditioning",
-            description="Encodes a system prompt and a user prompt using a CLIP model into an embedding "
-                        "that can be used to guide the diffusion model towards generating specific images.",
-            inputs=[
-                io.Combo.Input("system_prompt", options=list(cls.SYSTEM_PROMPT.keys()), tooltip=cls.SYSTEM_PROMPT_TIP),
-                io.String.Input("user_prompt", multiline=True, dynamic_prompts=True, tooltip="The text to be encoded."),
-                io.Clip.Input("clip", tooltip="The CLIP model used for encoding the text."),
-            ],
-            outputs=[
-                io.Conditioning.Output(tooltip="A conditioning containing the embedded text used to guide the diffusion model."),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, system_prompt, user_prompt, clip):
-        if clip is None:
-            raise RuntimeError(
-                "ERROR: clip input is invalid: None\n\n"
-                "If the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model."
-            )
-        system_prompt = cls.SYSTEM_PROMPT[system_prompt]
-        prompt = f'{system_prompt} <Prompt Start> {user_prompt}'
-        tokens = clip.tokenize(prompt)
-        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
-
-
 class RenormCFG(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -110,7 +66,51 @@ class RenormCFG(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+class CLIPTextEncodeLumina2(io.ComfyNode):
+    SYSTEM_PROMPT = {
+        "superior": "You are an assistant designed to generate superior images with the superior "
+            "degree of image-text alignment based on textual prompts or user prompts.",
+        "alignment": "You are an assistant designed to generate high-quality images with the "
+            "highest degree of image-text alignment based on textual prompts."
+    }
+    SYSTEM_PROMPT_TIP = "Lumina2 provide two types of system prompts:" \
+        "Superior: You are an assistant designed to generate superior images with the superior " \
+        "degree of image-text alignment based on textual prompts or user prompts. " \
+        "Alignment: You are an assistant designed to generate high-quality images with the highest " \
+        "degree of image-text alignment based on textual prompts."
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeLumina2_V3",
+            display_name="CLIP Text Encode for Lumina2 _V3",
+            category="conditioning",
+            description="Encodes a system prompt and a user prompt using a CLIP model into an embedding "
+                        "that can be used to guide the diffusion model towards generating specific images.",
+            inputs=[
+                io.Combo.Input("system_prompt", options=list(cls.SYSTEM_PROMPT.keys()), tooltip=cls.SYSTEM_PROMPT_TIP),
+                io.String.Input("user_prompt", multiline=True, dynamic_prompts=True, tooltip="The text to be encoded."),
+                io.Clip.Input("clip", tooltip="The CLIP model used for encoding the text."),
+            ],
+            outputs=[
+                io.Conditioning.Output(tooltip="A conditioning containing the embedded text used to guide the diffusion model."),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, system_prompt, user_prompt, clip):
+        if clip is None:
+            raise RuntimeError(
+                "ERROR: clip input is invalid: None\n\n"
+                "If the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model."
+            )
+        system_prompt = cls.SYSTEM_PROMPT[system_prompt]
+        prompt = f'{system_prompt} <Prompt Start> {user_prompt}'
+        tokens = clip.tokenize(prompt)
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeLumina2,
     RenormCFG,
 ]
diff --git a/comfy_extras/v3/nodes_mahiro.py b/comfy_extras/v3/nodes_mahiro.py
new file mode 100644
index 000000000..074a4b080
--- /dev/null
+++ b/comfy_extras/v3/nodes_mahiro.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import torch
+import torch.nn.functional as F
+
+from comfy_api.latest import io
+
+
+class Mahiro(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Mahiro_V3",
+            display_name="Mahiro is so cute that she deserves a better guidance function!! (。・ω・。) _V3",
+            category="_for_testing",
+            description="Modify the guidance to scale more on the 'direction' of the positive prompt rather than the difference between the negative prompt.",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model")
+            ],
+            outputs=[
+                io.Model.Output(display_name="patched_model")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model):
+        m = model.clone()
+        def mahiro_normd(args):
+            scale: float = args['cond_scale']
+            cond_p: torch.Tensor = args['cond_denoised']
+            uncond_p: torch.Tensor = args['uncond_denoised']
+            #naive leap
+            leap = cond_p * scale
+            #sim with uncond leap
+            u_leap = uncond_p * scale
+            cfg = args["denoised"]
+            merge = (leap + cfg) / 2
+            normu = torch.sqrt(u_leap.abs()) * u_leap.sign()
+            normm = torch.sqrt(merge.abs()) * merge.sign()
+            sim = F.cosine_similarity(normu, normm).mean()
+            simsc = 2 * (sim+1)
+            wm = (simsc*cfg + (4-simsc)*leap) / 4
+            return wm
+        m.set_model_sampler_post_cfg_function(mahiro_normd)
+        return io.NodeOutput(m)
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    Mahiro,
+]
diff --git a/comfy_extras/v3/nodes_mask.py b/comfy_extras/v3/nodes_mask.py
index 3fc736d40..3ea3a0431 100644
--- a/comfy_extras/v3/nodes_mask.py
+++ b/comfy_extras/v3/nodes_mask.py
@@ -57,6 +57,161 @@ def composite(destination, source, x, y, mask=None, multiplier=8, resize_source=
     return destination
 
 
+class LatentCompositeMasked(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentCompositeMasked_V3",
+            display_name="Latent Composite Masked _V3",
+            category="latent",
+            inputs=[
+                io.Latent.Input("destination"),
+                io.Latent.Input("source"),
+                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=8),
+                io.Boolean.Input("resize_source", default=False),
+                io.Mask.Input("mask", optional=True),
+            ],
+            outputs=[io.Latent.Output()],
+        )
+
+    @classmethod
+    def execute(cls, destination, source, x, y, resize_source, mask=None) -> io.NodeOutput:
+        output = destination.copy()
+        destination_samples = destination["samples"].clone()
+        source_samples = source["samples"]
+        output["samples"] = composite(destination_samples, source_samples, x, y, mask, 8, resize_source)
+        return io.NodeOutput(output)
+
+
+class ImageCompositeMasked(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageCompositeMasked_V3",
+            display_name="Image Composite Masked _V3",
+            category="image",
+            inputs=[
+                io.Image.Input("destination"),
+                io.Image.Input("source"),
+                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Boolean.Input("resize_source", default=False),
+                io.Mask.Input("mask", optional=True),
+            ],
+            outputs=[io.Image.Output()],
+        )
+
+    @classmethod
+    def execute(cls, destination, source, x, y, resize_source, mask=None) -> io.NodeOutput:
+        destination, source = node_helpers.image_alpha_fix(destination, source)
+        destination = destination.clone().movedim(-1, 1)
+        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
+        return io.NodeOutput(output)
+
+
+class MaskToImage(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="MaskToImage_V3",
+            display_name="Convert Mask to Image _V3",
+            category="mask",
+            inputs=[
+                io.Mask.Input("mask"),
+            ],
+            outputs=[io.Image.Output()],
+        )
+
+    @classmethod
+    def execute(cls, mask) -> io.NodeOutput:
+        return io.NodeOutput(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])).movedim(1, -1).expand(-1, -1, -1, 3))
+
+
+class ImageToMask(io.ComfyNode):
+    CHANNELS = ["red", "green", "blue", "alpha"]
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageToMask_V3",
+            display_name="Convert Image to Mask _V3",
+            category="mask",
+            inputs=[
+                io.Image.Input("image"),
+                io.Combo.Input("channel", options=cls.CHANNELS),
+            ],
+            outputs=[io.Mask.Output()],
+        )
+
+    @classmethod
+    def execute(cls, image, channel) -> io.NodeOutput:
+        return io.NodeOutput(image[:, :, :, cls.CHANNELS.index(channel)])
+
+
+class ImageColorToMask(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageColorToMask_V3",
+            display_name="Image Color to Mask _V3",
+            category="mask",
+            inputs=[
+                io.Image.Input("image"),
+                io.Int.Input("color", default=0, min=0, max=0xFFFFFF),
+            ],
+            outputs=[io.Mask.Output()],
+        )
+
+    @classmethod
+    def execute(cls, image, color) -> io.NodeOutput:
+        temp = (torch.clamp(image, 0, 1.0) * 255.0).round().to(torch.int)
+        temp = (
+            torch.bitwise_left_shift(temp[:, :, :, 0], 16)
+            + torch.bitwise_left_shift(temp[:, :, :, 1], 8)
+            + temp[:, :, :, 2]
+        )
+        return io.NodeOutput(torch.where(temp == color, 1.0, 0).float())
+
+
+class SolidMask(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SolidMask_V3",
+            display_name="Solid Mask _V3",
+            category="mask",
+            inputs=[
+                io.Float.Input("value", default=1.0, min=0.0, max=1.0, step=0.01),
+                io.Int.Input("width", default=512, min=1, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("height", default=512, min=1, max=nodes.MAX_RESOLUTION),
+            ],
+            outputs=[io.Mask.Output()],
+        )
+
+    @classmethod
+    def execute(cls, value, width, height) -> io.NodeOutput:
+        return io.NodeOutput(torch.full((1, height, width), value, dtype=torch.float32, device="cpu"))
+
+
+class InvertMask(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="InvertMask_V3",
+            display_name="Invert Mask _V3",
+            category="mask",
+            inputs=[
+                io.Mask.Input("mask"),
+            ],
+            outputs=[io.Mask.Output()],
+        )
+
+    @classmethod
+    def execute(cls, mask) -> io.NodeOutput:
+        return io.NodeOutput(1.0 - mask)
+
+
 class CropMask(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -80,6 +235,66 @@ class CropMask(io.ComfyNode):
         return io.NodeOutput(mask[:, y : y + height, x : x + width])
 
 
+class MaskComposite(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="MaskComposite_V3",
+            display_name="Mask Composite _V3",
+            category="mask",
+            inputs=[
+                io.Mask.Input("destination"),
+                io.Mask.Input("source"),
+                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Combo.Input("operation", options=["multiply", "add", "subtract", "and", "or", "xor"]),
+            ],
+            outputs=[io.Mask.Output()],
+        )
+
+    @classmethod
+    def execute(cls, destination, source, x, y, operation) -> io.NodeOutput:
+        output = destination.reshape((-1, destination.shape[-2], destination.shape[-1])).clone()
+        source = source.reshape((-1, source.shape[-2], source.shape[-1]))
+
+        left, top = (
+            x,
+            y,
+        )
+        right, bottom = (
+            min(left + source.shape[-1], destination.shape[-1]),
+            min(top + source.shape[-2], destination.shape[-2]),
+        )
+        visible_width, visible_height = (
+            right - left,
+            bottom - top,
+        )
+
+        source_portion = source[:, :visible_height, :visible_width]
+        destination_portion = output[:, top:bottom, left:right]
+
+        if operation == "multiply":
+            output[:, top:bottom, left:right] = destination_portion * source_portion
+        elif operation == "add":
+            output[:, top:bottom, left:right] = destination_portion + source_portion
+        elif operation == "subtract":
+            output[:, top:bottom, left:right] = destination_portion - source_portion
+        elif operation == "and":
+            output[:, top:bottom, left:right] = torch.bitwise_and(
+                destination_portion.round().bool(), source_portion.round().bool()
+            ).float()
+        elif operation == "or":
+            output[:, top:bottom, left:right] = torch.bitwise_or(
+                destination_portion.round().bool(), source_portion.round().bool()
+            ).float()
+        elif operation == "xor":
+            output[:, top:bottom, left:right] = torch.bitwise_xor(
+                destination_portion.round().bool(), source_portion.round().bool()
+            ).float()
+
+        return io.NodeOutput(torch.clamp(output, 0.0, 1.0))
+
+
 class FeatherMask(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -158,183 +373,28 @@ class GrowMask(io.ComfyNode):
         return io.NodeOutput(torch.stack(out, dim=0))
 
 
-class ImageColorToMask(io.ComfyNode):
+class ThresholdMask(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="ImageColorToMask_V3",
-            display_name="Image Color to Mask _V3",
-            category="mask",
-            inputs=[
-                io.Image.Input("image"),
-                io.Int.Input("color", default=0, min=0, max=0xFFFFFF),
-            ],
-            outputs=[io.Mask.Output()],
-        )
-
-    @classmethod
-    def execute(cls, image, color) -> io.NodeOutput:
-        temp = (torch.clamp(image, 0, 1.0) * 255.0).round().to(torch.int)
-        temp = (
-            torch.bitwise_left_shift(temp[:, :, :, 0], 16)
-            + torch.bitwise_left_shift(temp[:, :, :, 1], 8)
-            + temp[:, :, :, 2]
-        )
-        return io.NodeOutput(torch.where(temp == color, 1.0, 0).float())
-
-
-class ImageCompositeMasked(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ImageCompositeMasked_V3",
-            display_name="Image Composite Masked _V3",
-            category="image",
-            inputs=[
-                io.Image.Input("destination"),
-                io.Image.Input("source"),
-                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION),
-                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION),
-                io.Boolean.Input("resize_source", default=False),
-                io.Mask.Input("mask", optional=True),
-            ],
-            outputs=[io.Image.Output()],
-        )
-
-    @classmethod
-    def execute(cls, destination, source, x, y, resize_source, mask=None) -> io.NodeOutput:
-        destination, source = node_helpers.image_alpha_fix(destination, source)
-        destination = destination.clone().movedim(-1, 1)
-        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
-        return io.NodeOutput(output)
-
-
-class ImageToMask(io.ComfyNode):
-    CHANNELS = ["red", "green", "blue", "alpha"]
-
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ImageToMask_V3",
-            display_name="Convert Image to Mask _V3",
-            category="mask",
-            inputs=[
-                io.Image.Input("image"),
-                io.Combo.Input("channel", options=cls.CHANNELS),
-            ],
-            outputs=[io.Mask.Output()],
-        )
-
-    @classmethod
-    def execute(cls, image, channel) -> io.NodeOutput:
-        return io.NodeOutput(image[:, :, :, cls.CHANNELS.index(channel)])
-
-
-class InvertMask(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="InvertMask_V3",
-            display_name="Invert Mask _V3",
+            node_id="ThresholdMask_V3",
+            display_name="Threshold Mask _V3",
             category="mask",
             inputs=[
                 io.Mask.Input("mask"),
+                io.Float.Input("value", default=0.5, min=0.0, max=1.0, step=0.01),
             ],
             outputs=[io.Mask.Output()],
         )
 
     @classmethod
-    def execute(cls, mask) -> io.NodeOutput:
-        return io.NodeOutput(1.0 - mask)
-
-
-class LatentCompositeMasked(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LatentCompositeMasked_V3",
-            display_name="Latent Composite Masked _V3",
-            category="latent",
-            inputs=[
-                io.Latent.Input("destination"),
-                io.Latent.Input("source"),
-                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=8),
-                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=8),
-                io.Boolean.Input("resize_source", default=False),
-                io.Mask.Input("mask", optional=True),
-            ],
-            outputs=[io.Latent.Output()],
-        )
-
-    @classmethod
-    def execute(cls, destination, source, x, y, resize_source, mask=None) -> io.NodeOutput:
-        output = destination.copy()
-        destination_samples = destination["samples"].clone()
-        source_samples = source["samples"]
-        output["samples"] = composite(destination_samples, source_samples, x, y, mask, 8, resize_source)
-        return io.NodeOutput(output)
-
-
-class MaskComposite(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="MaskComposite_V3",
-            display_name="Mask Composite _V3",
-            category="mask",
-            inputs=[
-                io.Mask.Input("destination"),
-                io.Mask.Input("source"),
-                io.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION),
-                io.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION),
-                io.Combo.Input("operation", options=["multiply", "add", "subtract", "and", "or", "xor"]),
-            ],
-            outputs=[io.Mask.Output()],
-        )
-
-    @classmethod
-    def execute(cls, destination, source, x, y, operation) -> io.NodeOutput:
-        output = destination.reshape((-1, destination.shape[-2], destination.shape[-1])).clone()
-        source = source.reshape((-1, source.shape[-2], source.shape[-1]))
-
-        left, top = (
-            x,
-            y,
-        )
-        right, bottom = (
-            min(left + source.shape[-1], destination.shape[-1]),
-            min(top + source.shape[-2], destination.shape[-2]),
-        )
-        visible_width, visible_height = (
-            right - left,
-            bottom - top,
-        )
-
-        source_portion = source[:, :visible_height, :visible_width]
-        destination_portion = output[:, top:bottom, left:right]
-
-        if operation == "multiply":
-            output[:, top:bottom, left:right] = destination_portion * source_portion
-        elif operation == "add":
-            output[:, top:bottom, left:right] = destination_portion + source_portion
-        elif operation == "subtract":
-            output[:, top:bottom, left:right] = destination_portion - source_portion
-        elif operation == "and":
-            output[:, top:bottom, left:right] = torch.bitwise_and(
-                destination_portion.round().bool(), source_portion.round().bool()
-            ).float()
-        elif operation == "or":
-            output[:, top:bottom, left:right] = torch.bitwise_or(
-                destination_portion.round().bool(), source_portion.round().bool()
-            ).float()
-        elif operation == "xor":
-            output[:, top:bottom, left:right] = torch.bitwise_xor(
-                destination_portion.round().bool(), source_portion.round().bool()
-            ).float()
-
-        return io.NodeOutput(torch.clamp(output, 0.0, 1.0))
+    def execute(cls, mask, value) -> io.NodeOutput:
+        return io.NodeOutput((mask > value).float())
 
 
+# Mask Preview - original implement from
+# https://github.com/cubiq/ComfyUI_essentials/blob/9d9f4bedfc9f0321c19faf71855e228c93bd0dc9/mask.py#L81
+# upstream requested in https://github.com/Kosinkadink/rfcs/blob/main/rfcs/0000-corenodes.md#preview-nodes
 class MaskPreview(io.ComfyNode):
     """Mask Preview - original implement in ComfyUI_essentials.
 
@@ -360,63 +420,6 @@ class MaskPreview(io.ComfyNode):
         return io.NodeOutput(ui=ui.PreviewMask(masks))
 
 
-class MaskToImage(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="MaskToImage_V3",
-            display_name="Convert Mask to Image _V3",
-            category="mask",
-            inputs=[
-                io.Mask.Input("mask"),
-            ],
-            outputs=[io.Image.Output()],
-        )
-
-    @classmethod
-    def execute(cls, mask) -> io.NodeOutput:
-        return io.NodeOutput(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])).movedim(1, -1).expand(-1, -1, -1, 3))
-
-
-class SolidMask(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="SolidMask_V3",
-            display_name="Solid Mask _V3",
-            category="mask",
-            inputs=[
-                io.Float.Input("value", default=1.0, min=0.0, max=1.0, step=0.01),
-                io.Int.Input("width", default=512, min=1, max=nodes.MAX_RESOLUTION),
-                io.Int.Input("height", default=512, min=1, max=nodes.MAX_RESOLUTION),
-            ],
-            outputs=[io.Mask.Output()],
-        )
-
-    @classmethod
-    def execute(cls, value, width, height) -> io.NodeOutput:
-        return io.NodeOutput(torch.full((1, height, width), value, dtype=torch.float32, device="cpu"))
-
-
-class ThresholdMask(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ThresholdMask_V3",
-            display_name="Threshold Mask _V3",
-            category="mask",
-            inputs=[
-                io.Mask.Input("mask"),
-                io.Float.Input("value", default=0.5, min=0.0, max=1.0, step=0.01),
-            ],
-            outputs=[io.Mask.Output()],
-        )
-
-    @classmethod
-    def execute(cls, mask, value) -> io.NodeOutput:
-        return io.NodeOutput((mask > value).float())
-
-
 NODES_LIST: list[type[io.ComfyNode]] = [
     CropMask,
     FeatherMask,
diff --git a/comfy_extras/v3/nodes_mochi.py b/comfy_extras/v3/nodes_mochi.py
index 77677e4df..18c68634a 100644
--- a/comfy_extras/v3/nodes_mochi.py
+++ b/comfy_extras/v3/nodes_mochi.py
@@ -33,6 +33,6 @@ class EmptyMochiLatentVideo(io.ComfyNode):
         return io.NodeOutput({"samples": latent})
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     EmptyMochiLatentVideo,
 ]
diff --git a/comfy_extras/v3/nodes_model_advanced.py b/comfy_extras/v3/nodes_model_advanced.py
index 55e372f71..9b2855e61 100644
--- a/comfy_extras/v3/nodes_model_advanced.py
+++ b/comfy_extras/v3/nodes_model_advanced.py
@@ -17,7 +17,7 @@ class LCM(comfy.model_sampling.EPS):
         x0 = model_input - model_output * sigma
 
         sigma_data = 0.5
-        scaled_timestep = timestep * 10.0 #timestep_scaling
+        scaled_timestep = timestep * 10.0 # timestep_scaling
 
         c_skip = sigma_data**2 / (scaled_timestep**2 + sigma_data**2)
         c_out = scaled_timestep / (scaled_timestep**2 + sigma_data**2) ** 0.5
@@ -57,15 +57,16 @@ class ModelSamplingDiscreteDistilled(comfy.model_sampling.ModelSamplingDiscrete)
         return log_sigma.exp().to(timestep.device)
 
 
-class ModelComputeDtype(io.ComfyNode):
+class ModelSamplingDiscrete(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
-            node_id="ModelComputeDtype_V3",
-            category="advanced/debug/model",
+            node_id="ModelSamplingDiscrete_V3",
+            category="advanced/model",
             inputs=[
                 io.Model.Input("model"),
-                io.Combo.Input("dtype", options=["default", "fp32", "fp16", "bf16"]),
+                io.Combo.Input("sampling", options=["eps", "v_prediction", "lcm", "x0", "img_to_img"]),
+                io.Boolean.Input("zsnr", default=False),
             ],
             outputs=[
                 io.Model.Output(),
@@ -73,9 +74,150 @@ class ModelComputeDtype(io.ComfyNode):
         )
 
     @classmethod
-    def execute(cls, model, dtype):
+    def execute(cls, model, sampling, zsnr):
         m = model.clone()
-        m.set_model_compute_dtype(node_helpers.string_to_torch_dtype(dtype))
+
+        sampling_base = comfy.model_sampling.ModelSamplingDiscrete
+        if sampling == "eps":
+            sampling_type = comfy.model_sampling.EPS
+        elif sampling == "v_prediction":
+            sampling_type = comfy.model_sampling.V_PREDICTION
+        elif sampling == "lcm":
+            sampling_type = LCM
+            sampling_base = ModelSamplingDiscreteDistilled
+        elif sampling == "x0":
+            sampling_type = comfy.model_sampling.X0
+        elif sampling == "img_to_img":
+            sampling_type = comfy.model_sampling.IMG_TO_IMG
+
+        class ModelSamplingAdvanced(sampling_base, sampling_type):
+            pass
+
+        model_sampling = ModelSamplingAdvanced(model.model.model_config, zsnr=zsnr)
+
+        m.add_object_patch("model_sampling", model_sampling)
+        return io.NodeOutput(m)
+
+
+class ModelSamplingStableCascade(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSamplingStableCascade_V3",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("shift", default=2.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, shift):
+        m = model.clone()
+
+        sampling_base = comfy.model_sampling.StableCascadeSampling
+        sampling_type = comfy.model_sampling.EPS
+
+        class ModelSamplingAdvanced(sampling_base, sampling_type):
+            pass
+
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
+        model_sampling.set_parameters(shift)
+        m.add_object_patch("model_sampling", model_sampling)
+        return io.NodeOutput(m)
+
+
+class ModelSamplingSD3(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSamplingSD3_V3",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("shift", default=3.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, shift, multiplier: int | float = 1000):
+        m = model.clone()
+
+        sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
+        sampling_type = comfy.model_sampling.CONST
+
+        class ModelSamplingAdvanced(sampling_base, sampling_type):
+            pass
+
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
+        model_sampling.set_parameters(shift=shift, multiplier=multiplier)
+        m.add_object_patch("model_sampling", model_sampling)
+        return io.NodeOutput(m)
+
+
+class ModelSamplingAuraFlow(ModelSamplingSD3):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSamplingAuraFlow_V3",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("shift", default=1.73, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, shift, multiplier: int | float = 1.0):
+        return super().execute(model, shift, multiplier)
+
+
+class ModelSamplingFlux(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSamplingFlux_V3",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("max_shift", default=1.15, min=0.0, max=100.0, step=0.01),
+                io.Float.Input("base_shift", default=0.5, min=0.0, max=100.0, step=0.01),
+                io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=8),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, max_shift, base_shift, width, height):
+        m = model.clone()
+
+        x1 = 256
+        x2 = 4096
+        mm = (max_shift - base_shift) / (x2 - x1)
+        b = base_shift - mm * x1
+        shift = (width * height / (8 * 8 * 2 * 2)) * mm + b
+
+        sampling_base = comfy.model_sampling.ModelSamplingFlux
+        sampling_type = comfy.model_sampling.CONST
+
+        class ModelSamplingAdvanced(sampling_base, sampling_type):
+            pass
+
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
+        model_sampling.set_parameters(shift=shift)
+        m.add_object_patch("model_sampling", model_sampling)
         return io.NodeOutput(m)
 
 
@@ -165,170 +307,6 @@ class ModelSamplingContinuousV(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-class ModelSamplingDiscrete(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ModelSamplingDiscrete_V3",
-            category="advanced/model",
-            inputs=[
-                io.Model.Input("model"),
-                io.Combo.Input("sampling", options=["eps", "v_prediction", "lcm", "x0", "img_to_img"]),
-                io.Boolean.Input("zsnr", default=False),
-            ],
-            outputs=[
-                io.Model.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model, sampling, zsnr):
-        m = model.clone()
-
-        sampling_base = comfy.model_sampling.ModelSamplingDiscrete
-        if sampling == "eps":
-            sampling_type = comfy.model_sampling.EPS
-        elif sampling == "v_prediction":
-            sampling_type = comfy.model_sampling.V_PREDICTION
-        elif sampling == "lcm":
-            sampling_type = LCM
-            sampling_base = ModelSamplingDiscreteDistilled
-        elif sampling == "x0":
-            sampling_type = comfy.model_sampling.X0
-        elif sampling == "img_to_img":
-            sampling_type = comfy.model_sampling.IMG_TO_IMG
-
-        class ModelSamplingAdvanced(sampling_base, sampling_type):
-            pass
-
-        model_sampling = ModelSamplingAdvanced(model.model.model_config, zsnr=zsnr)
-
-        m.add_object_patch("model_sampling", model_sampling)
-        return io.NodeOutput(m)
-
-
-class ModelSamplingFlux(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ModelSamplingFlux_V3",
-            category="advanced/model",
-            inputs=[
-                io.Model.Input("model"),
-                io.Float.Input("max_shift", default=1.15, min=0.0, max=100.0, step=0.01),
-                io.Float.Input("base_shift", default=0.5, min=0.0, max=100.0, step=0.01),
-                io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=8),
-                io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=8),
-            ],
-            outputs=[
-                io.Model.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model, max_shift, base_shift, width, height):
-        m = model.clone()
-
-        x1 = 256
-        x2 = 4096
-        mm = (max_shift - base_shift) / (x2 - x1)
-        b = base_shift - mm * x1
-        shift = (width * height / (8 * 8 * 2 * 2)) * mm + b
-
-        sampling_base = comfy.model_sampling.ModelSamplingFlux
-        sampling_type = comfy.model_sampling.CONST
-
-        class ModelSamplingAdvanced(sampling_base, sampling_type):
-            pass
-
-        model_sampling = ModelSamplingAdvanced(model.model.model_config)
-        model_sampling.set_parameters(shift=shift)
-        m.add_object_patch("model_sampling", model_sampling)
-        return io.NodeOutput(m)
-
-
-class ModelSamplingSD3(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ModelSamplingSD3_V3",
-            category="advanced/model",
-            inputs=[
-                io.Model.Input("model"),
-                io.Float.Input("shift", default=3.0, min=0.0, max=100.0, step=0.01),
-            ],
-            outputs=[
-                io.Model.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model, shift, multiplier: int | float = 1000):
-        m = model.clone()
-
-        sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
-        sampling_type = comfy.model_sampling.CONST
-
-        class ModelSamplingAdvanced(sampling_base, sampling_type):
-            pass
-
-        model_sampling = ModelSamplingAdvanced(model.model.model_config)
-        model_sampling.set_parameters(shift=shift, multiplier=multiplier)
-        m.add_object_patch("model_sampling", model_sampling)
-        return io.NodeOutput(m)
-
-
-class ModelSamplingAuraFlow(ModelSamplingSD3):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ModelSamplingAuraFlow_V3",
-            category="advanced/model",
-            inputs=[
-                io.Model.Input("model"),
-                io.Float.Input("shift", default=1.73, min=0.0, max=100.0, step=0.01),
-            ],
-            outputs=[
-                io.Model.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model, shift, multiplier: int | float = 1.0):
-        return super().execute(model, shift, multiplier)
-
-
-class ModelSamplingStableCascade(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ModelSamplingStableCascade_V3",
-            category="advanced/model",
-            inputs=[
-                io.Model.Input("model"),
-                io.Float.Input("shift", default=2.0, min=0.0, max=100.0, step=0.01),
-            ],
-            outputs=[
-                io.Model.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model, shift):
-        m = model.clone()
-
-        sampling_base = comfy.model_sampling.StableCascadeSampling
-        sampling_type = comfy.model_sampling.EPS
-
-        class ModelSamplingAdvanced(sampling_base, sampling_type):
-            pass
-
-        model_sampling = ModelSamplingAdvanced(model.model.model_config)
-        model_sampling.set_parameters(shift)
-        m.add_object_patch("model_sampling", model_sampling)
-        return io.NodeOutput(m)
-
-
 class RescaleCFG(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -374,7 +352,29 @@ class RescaleCFG(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+class ModelComputeDtype(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelComputeDtype_V3",
+            category="advanced/debug/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Combo.Input("dtype", options=["default", "fp32", "fp16", "bf16"]),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, dtype):
+        m = model.clone()
+        m.set_model_compute_dtype(node_helpers.string_to_torch_dtype(dtype))
+        return io.NodeOutput(m)
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     ModelSamplingAuraFlow,
     ModelComputeDtype,
     ModelSamplingContinuousEDM,
diff --git a/comfy_extras/v3/nodes_model_downscale.py b/comfy_extras/v3/nodes_model_downscale.py
index 5eaddb7af..322a3bd0f 100644
--- a/comfy_extras/v3/nodes_model_downscale.py
+++ b/comfy_extras/v3/nodes_model_downscale.py
@@ -63,6 +63,6 @@ class PatchModelAddDownscale(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     PatchModelAddDownscale,
 ]
diff --git a/comfy_extras/v3/nodes_model_merging.py b/comfy_extras/v3/nodes_model_merging.py
new file mode 100644
index 000000000..5cf62d869
--- /dev/null
+++ b/comfy_extras/v3/nodes_model_merging.py
@@ -0,0 +1,422 @@
+from __future__ import annotations
+
+import json
+import os
+
+import torch
+
+import comfy.model_base
+import comfy.model_management
+import comfy.model_sampling
+import comfy.sd
+import comfy.utils
+import folder_paths
+from comfy.cli_args import args
+from comfy_api.latest import io
+
+
+def save_checkpoint(model, clip=None, vae=None, clip_vision=None, filename_prefix=None, output_dir=None, prompt=None, extra_pnginfo=None):
+    full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, output_dir)
+    prompt_info = ""
+    if prompt is not None:
+        prompt_info = json.dumps(prompt)
+
+    metadata = {}
+
+    enable_modelspec = True
+    if isinstance(model.model, comfy.model_base.SDXL):
+        if isinstance(model.model, comfy.model_base.SDXL_instructpix2pix):
+            metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-edit"
+        else:
+            metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base"
+    elif isinstance(model.model, comfy.model_base.SDXLRefiner):
+        metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner"
+    elif isinstance(model.model, comfy.model_base.SVD_img2vid):
+        metadata["modelspec.architecture"] = "stable-video-diffusion-img2vid-v1"
+    elif isinstance(model.model, comfy.model_base.SD3):
+        metadata["modelspec.architecture"] = "stable-diffusion-v3-medium" #TODO: other SD3 variants
+    else:
+        enable_modelspec = False
+
+    if enable_modelspec:
+        metadata["modelspec.sai_model_spec"] = "1.0.0"
+        metadata["modelspec.implementation"] = "sgm"
+        metadata["modelspec.title"] = "{} {}".format(filename, counter)
+
+    #TODO:
+    # "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512",
+    # "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h",
+    # "v2-inpainting"
+
+    extra_keys = {}
+    model_sampling = model.get_model_object("model_sampling")
+    if isinstance(model_sampling, comfy.model_sampling.ModelSamplingContinuousEDM):
+        if isinstance(model_sampling, comfy.model_sampling.V_PREDICTION):
+            extra_keys["edm_vpred.sigma_max"] = torch.tensor(model_sampling.sigma_max).float()
+            extra_keys["edm_vpred.sigma_min"] = torch.tensor(model_sampling.sigma_min).float()
+
+    if model.model.model_type == comfy.model_base.ModelType.EPS:
+        metadata["modelspec.predict_key"] = "epsilon"
+    elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION:
+        metadata["modelspec.predict_key"] = "v"
+        extra_keys["v_pred"] = torch.tensor([])
+        if getattr(model_sampling, "zsnr", False):
+            extra_keys["ztsnr"] = torch.tensor([])
+
+    if not args.disable_metadata:
+        metadata["prompt"] = prompt_info
+        if extra_pnginfo is not None:
+            for x in extra_pnginfo:
+                metadata[x] = json.dumps(extra_pnginfo[x])
+
+    output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+    output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+
+    comfy.sd.save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata, extra_keys=extra_keys)
+
+
+class ModelMergeSimple(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelMergeSimple_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Model.Input("model1"),
+                io.Model.Input("model2"),
+                io.Float.Input("ratio", default=1.0, min=0.0, max=1.0, step=0.01)
+            ],
+            outputs=[
+                io.Model.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model1, model2, ratio):
+        m = model1.clone()
+        kp = model2.get_key_patches("diffusion_model.")
+        for k in kp:
+            m.add_patches({k: kp[k]}, 1.0 - ratio, ratio)
+        return io.NodeOutput(m)
+
+
+class ModelSubtract(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelMergeSubtract_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Model.Input("model1"),
+                io.Model.Input("model2"),
+                io.Float.Input("multiplier", default=1.0, min=-10.0, max=10.0, step=0.01)
+            ],
+            outputs=[
+                io.Model.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model1, model2, multiplier):
+        m = model1.clone()
+        kp = model2.get_key_patches("diffusion_model.")
+        for k in kp:
+            m.add_patches({k: kp[k]}, - multiplier, multiplier)
+        return io.NodeOutput(m)
+
+
+class ModelAdd(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelMergeAdd_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Model.Input("model1"),
+                io.Model.Input("model2")
+            ],
+            outputs=[
+                io.Model.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model1, model2):
+        m = model1.clone()
+        kp = model2.get_key_patches("diffusion_model.")
+        for k in kp:
+            m.add_patches({k: kp[k]}, 1.0, 1.0)
+        return io.NodeOutput(m)
+
+
+class CLIPMergeSimple(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPMergeSimple_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Clip.Input("clip1"),
+                io.Clip.Input("clip2"),
+                io.Float.Input("ratio", default=1.0, min=0.0, max=1.0, step=0.01)
+            ],
+            outputs=[
+                io.Clip.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip1, clip2, ratio):
+        m = clip1.clone()
+        kp = clip2.get_key_patches()
+        for k in kp:
+            if k.endswith(".position_ids") or k.endswith(".logit_scale"):
+                continue
+            m.add_patches({k: kp[k]}, 1.0 - ratio, ratio)
+        return io.NodeOutput(m)
+
+
+class CLIPSubtract(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPMergeSubtract_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Clip.Input("clip1"),
+                io.Clip.Input("clip2"),
+                io.Float.Input("multiplier", default=1.0, min=-10.0, max=10.0, step=0.01)
+            ],
+            outputs=[
+                io.Clip.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip1, clip2, multiplier):
+        m = clip1.clone()
+        kp = clip2.get_key_patches()
+        for k in kp:
+            if k.endswith(".position_ids") or k.endswith(".logit_scale"):
+                continue
+            m.add_patches({k: kp[k]}, - multiplier, multiplier)
+        return io.NodeOutput(m)
+
+
+class CLIPAdd(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPMergeAdd_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Clip.Input("clip1"),
+                io.Clip.Input("clip2")
+            ],
+            outputs=[
+                io.Clip.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip1, clip2):
+        m = clip1.clone()
+        kp = clip2.get_key_patches()
+        for k in kp:
+            if k.endswith(".position_ids") or k.endswith(".logit_scale"):
+                continue
+            m.add_patches({k: kp[k]}, 1.0, 1.0)
+        return io.NodeOutput(m)
+
+
+class ModelMergeBlocks(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelMergeBlocks_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Model.Input("model1"),
+                io.Model.Input("model2"),
+                io.Float.Input("input", default=1.0, min=0.0, max=1.0, step=0.01),
+                io.Float.Input("middle", default=1.0, min=0.0, max=1.0, step=0.01),
+                io.Float.Input("out", default=1.0, min=0.0, max=1.0, step=0.01)
+            ],
+            outputs=[
+                io.Model.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, model1, model2, **kwargs):
+        m = model1.clone()
+        kp = model2.get_key_patches("diffusion_model.")
+        default_ratio = next(iter(kwargs.values()))
+
+        for k in kp:
+            ratio = default_ratio
+            k_unet = k[len("diffusion_model."):]
+
+            last_arg_size = 0
+            for arg in kwargs:
+                if k_unet.startswith(arg) and last_arg_size < len(arg):
+                    ratio = kwargs[arg]
+                    last_arg_size = len(arg)
+
+            m.add_patches({k: kp[k]}, 1.0 - ratio, ratio)
+        return io.NodeOutput(m)
+
+
+class CheckpointSave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CheckpointSave_V3",
+            display_name="Save Checkpoint _V3",
+            category="advanced/model_merging",
+            is_output_node=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.Clip.Input("clip"),
+                io.Vae.Input("vae"),
+                io.String.Input("filename_prefix", default="checkpoints/ComfyUI")
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo]
+        )
+
+    @classmethod
+    def execute(cls, model, clip, vae, filename_prefix):
+        save_checkpoint(model, clip=clip, vae=vae, filename_prefix=filename_prefix, output_dir=folder_paths.get_output_directory(), prompt=cls.hidden.prompt, extra_pnginfo=cls.hidden.extra_pnginfo)
+        return io.NodeOutput()
+
+
+class CLIPSave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPSave_V3",
+            category="advanced/model_merging",
+            is_output_node=True,
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("filename_prefix", default="clip/ComfyUI")
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo]
+        )
+
+    @classmethod
+    def execute(cls, clip, filename_prefix):
+        prompt_info = ""
+        if cls.hidden.prompt is not None:
+            prompt_info = json.dumps(cls.hidden.prompt)
+
+        metadata = {}
+        if not args.disable_metadata:
+            metadata["format"] = "pt"
+            metadata["prompt"] = prompt_info
+            if cls.hidden.extra_pnginfo is not None:
+                for x in cls.hidden.extra_pnginfo:
+                    metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
+
+        comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True)
+        clip_sd = clip.get_sd()
+
+        for prefix in ["clip_l.", "clip_g.", "clip_h.", "t5xxl.", "pile_t5xl.", "mt5xl.", "umt5xxl.", "t5base.", "gemma2_2b.", "llama.", "hydit_clip.", ""]:
+            k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys()))
+            current_clip_sd = {}
+            for x in k:
+                current_clip_sd[x] = clip_sd.pop(x)
+            if len(current_clip_sd) == 0:
+                continue
+
+            p = prefix[:-1]
+            replace_prefix = {}
+            filename_prefix_ = filename_prefix
+            if len(p) > 0:
+                filename_prefix_ = "{}_{}".format(filename_prefix_, p)
+                replace_prefix[prefix] = ""
+            replace_prefix["transformer."] = ""
+
+            full_output_folder, filename, counter, subfolder, filename_prefix_ = folder_paths.get_save_image_path(filename_prefix_, folder_paths.get_output_directory())
+
+            output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+            output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+
+            current_clip_sd = comfy.utils.state_dict_prefix_replace(current_clip_sd, replace_prefix)
+
+            comfy.utils.save_torch_file(current_clip_sd, output_checkpoint, metadata=metadata)
+        return io.NodeOutput()
+
+
+class VAESave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VAESave_V3",
+            category="advanced/model_merging",
+            is_output_node=True,
+            inputs=[
+                io.Vae.Input("vae"),
+                io.String.Input("filename_prefix", default="vae/ComfyUI_vae")
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo]
+        )
+
+    @classmethod
+    def execute(cls, vae, filename_prefix):
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
+        prompt_info = ""
+        if cls.hidden.prompt is not None:
+            prompt_info = json.dumps(cls.hidden.prompt)
+
+        metadata = {}
+        if not args.disable_metadata:
+            metadata["prompt"] = prompt_info
+            if cls.hidden.extra_pnginfo is not None:
+                for x in cls.hidden.extra_pnginfo:
+                    metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
+
+        output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+
+        comfy.utils.save_torch_file(vae.get_sd(), output_checkpoint, metadata=metadata)
+        return io.NodeOutput()
+
+
+class ModelSave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSave_V3",
+            category="advanced/model_merging",
+            is_output_node=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.String.Input("filename_prefix", default="diffusion_models/ComfyUI")
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo]
+        )
+
+    @classmethod
+    def execute(cls, model, filename_prefix):
+        save_checkpoint(model, filename_prefix=filename_prefix, output_dir=folder_paths.get_output_directory(), prompt=cls.hidden.prompt, extra_pnginfo=cls.hidden.extra_pnginfo)
+        return io.NodeOutput()
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    CheckpointSave,
+    CLIPAdd,
+    CLIPMergeSimple,
+    CLIPSave,
+    CLIPSubtract,
+    ModelAdd,
+    ModelMergeBlocks,
+    ModelMergeSimple,
+    ModelSave,
+    ModelSubtract,
+    VAESave,
+]
diff --git a/comfy_extras/v3/nodes_model_merging_model_specific.py b/comfy_extras/v3/nodes_model_merging_model_specific.py
new file mode 100644
index 000000000..59069e0cd
--- /dev/null
+++ b/comfy_extras/v3/nodes_model_merging_model_specific.py
@@ -0,0 +1,399 @@
+from __future__ import annotations
+
+from comfy_api.latest import io
+from comfy_extras.v3.nodes_model_merging import ModelMergeBlocks
+
+
+class ModelMergeSD1(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("time_embed.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("label_emb.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(12):
+            inputs.append(io.Float.Input(f"input_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(3):
+            inputs.append(io.Float.Input(f"middle_block.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(12):
+            inputs.append(io.Float.Input(f"output_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("out.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeSD1_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeSDXL(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("time_embed.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("label_emb.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(9):
+            inputs.append(io.Float.Input(f"input_blocks.{i}", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(3):
+            inputs.append(io.Float.Input(f"middle_block.{i}", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(9):
+            inputs.append(io.Float.Input(f"output_blocks.{i}", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("out.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeSDXL_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeSD3_2B(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embed.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("context_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("y_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(24):
+            inputs.append(io.Float.Input(f"joint_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeSD3_2B_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeAuraflow(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("init_x_linear.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("positional_encoding", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("cond_seq_linear.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("register_tokens", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(4):
+            inputs.append(io.Float.Input(f"double_layers.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(32):
+            inputs.append(io.Float.Input(f"single_layers.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.extend([
+            io.Float.Input("modF.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("final_linear.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ])
+
+        return io.Schema(
+            node_id="ModelMergeAuraflow_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeFlux1(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("img_in.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("time_in.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("guidance_in", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("vector_in.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("txt_in.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(19):
+            inputs.append(io.Float.Input(f"double_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        for i in range(38):
+            inputs.append(io.Float.Input(f"single_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeFlux1_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeSD35_Large(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embed.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("context_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("y_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(38):
+            inputs.append(io.Float.Input(f"joint_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeSD35_Large_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeMochiPreview(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_frequencies.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t5_y_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t5_yproj.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(48):
+            inputs.append(io.Float.Input(f"blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeMochiPreview_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeLTXV(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("patchify_proj.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("adaln_single.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("caption_projection.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(28):
+            inputs.append(io.Float.Input(f"transformer_blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.extend([
+            io.Float.Input("scale_shift_table", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("proj_out.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ])
+
+        return io.Schema(
+            node_id="ModelMergeLTXV_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeCosmos7B(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("extra_pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("affline_norm.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(28):
+            inputs.append(io.Float.Input(f"blocks.block{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeCosmos7B_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeCosmos14B(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("extra_pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("affline_norm.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(36):
+            inputs.append(io.Float.Input(f"blocks.block{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeCosmos14B_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeWAN2_1(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("patch_embedding.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("time_embedding.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("time_projection.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("text_embedding.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("img_emb.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(40):
+            inputs.append(io.Float.Input(f"blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("head.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeWAN2_1_V3",
+            category="advanced/model_merging/model_specific",
+            description="1.3B model has 30 blocks, 14B model has 40 blocks. Image to video model has the extra img_emb.",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeCosmosPredict2_2B(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedding_norm.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(28):
+            inputs.append(io.Float.Input(f"blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeCosmosPredict2_2B_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+class ModelMergeCosmosPredict2_14B(ModelMergeBlocks):
+    @classmethod
+    def define_schema(cls):
+        inputs = [
+            io.Model.Input("model1"),
+            io.Model.Input("model2"),
+            io.Float.Input("pos_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("x_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedder.", default=1.0, min=0.0, max=1.0, step=0.01),
+            io.Float.Input("t_embedding_norm.", default=1.0, min=0.0, max=1.0, step=0.01)
+        ]
+
+        for i in range(36):
+            inputs.append(io.Float.Input(f"blocks.{i}.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        inputs.append(io.Float.Input("final_layer.", default=1.0, min=0.0, max=1.0, step=0.01))
+
+        return io.Schema(
+            node_id="ModelMergeCosmosPredict2_14B_V3",
+            category="advanced/model_merging/model_specific",
+            inputs=inputs,
+            outputs=[
+                io.Model.Output(),
+            ]
+        )
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    ModelMergeAuraflow,
+    ModelMergeCosmos14B,
+    ModelMergeCosmos7B,
+    ModelMergeCosmosPredict2_14B,
+    ModelMergeCosmosPredict2_2B,
+    ModelMergeFlux1,
+    ModelMergeLTXV,
+    ModelMergeMochiPreview,
+    ModelMergeSD1,
+    ModelMergeSD3_2B,
+    ModelMergeSD35_Large,
+    ModelMergeSDXL,
+    ModelMergeWAN2_1,
+]
diff --git a/comfy_extras/v3/nodes_morphology.py b/comfy_extras/v3/nodes_morphology.py
index bb4e2543a..1f28951cd 100644
--- a/comfy_extras/v3/nodes_morphology.py
+++ b/comfy_extras/v3/nodes_morphology.py
@@ -16,6 +16,47 @@ import comfy.model_management
 from comfy_api.latest import io
 
 
+class Morphology(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Morphology_V3",
+            display_name="ImageMorphology _V3",
+            category="image/postprocessing",
+            inputs=[
+                io.Image.Input("image"),
+                io.Combo.Input("operation", options=["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"]),
+                io.Int.Input("kernel_size", default=3, min=3, max=999, step=1),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, image, operation, kernel_size):
+        device = comfy.model_management.get_torch_device()
+        kernel = torch.ones(kernel_size, kernel_size, device=device)
+        image_k = image.to(device).movedim(-1, 1)
+        if operation == "erode":
+            output = erosion(image_k, kernel)
+        elif operation == "dilate":
+            output = dilation(image_k, kernel)
+        elif operation == "open":
+            output = opening(image_k, kernel)
+        elif operation == "close":
+            output = closing(image_k, kernel)
+        elif operation == "gradient":
+            output = gradient(image_k, kernel)
+        elif operation == "top_hat":
+            output = top_hat(image_k, kernel)
+        elif operation == "bottom_hat":
+            output = bottom_hat(image_k, kernel)
+        else:
+            raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
+        return io.NodeOutput(output.to(comfy.model_management.intermediate_device()).movedim(1, -1))
+
+
 class ImageRGBToYUV(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -60,48 +101,7 @@ class ImageYUVToRGB(io.ComfyNode):
         return io.NodeOutput(kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1))
 
 
-class Morphology(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="Morphology_V3",
-            display_name="ImageMorphology _V3",
-            category="image/postprocessing",
-            inputs=[
-                io.Image.Input("image"),
-                io.Combo.Input("operation", options=["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"]),
-                io.Int.Input("kernel_size", default=3, min=3, max=999, step=1),
-            ],
-            outputs=[
-                io.Image.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, image, operation, kernel_size):
-        device = comfy.model_management.get_torch_device()
-        kernel = torch.ones(kernel_size, kernel_size, device=device)
-        image_k = image.to(device).movedim(-1, 1)
-        if operation == "erode":
-            output = erosion(image_k, kernel)
-        elif operation == "dilate":
-            output = dilation(image_k, kernel)
-        elif operation == "open":
-            output = opening(image_k, kernel)
-        elif operation == "close":
-            output = closing(image_k, kernel)
-        elif operation == "gradient":
-            output = gradient(image_k, kernel)
-        elif operation == "top_hat":
-            output = top_hat(image_k, kernel)
-        elif operation == "bottom_hat":
-            output = bottom_hat(image_k, kernel)
-        else:
-            raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
-        return io.NodeOutput(output.to(comfy.model_management.intermediate_device()).movedim(1, -1))
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     ImageRGBToYUV,
     ImageYUVToRGB,
     Morphology,
diff --git a/comfy_extras/v3/nodes_optimalsteps.py b/comfy_extras/v3/nodes_optimalsteps.py
index f8f6e3b07..b8f398e91 100644
--- a/comfy_extras/v3/nodes_optimalsteps.py
+++ b/comfy_extras/v3/nodes_optimalsteps.py
@@ -59,4 +59,6 @@ class OptimalStepsScheduler(io.ComfyNode):
         return io.NodeOutput(torch.FloatTensor(sigmas))
 
 
-NODES_LIST = [OptimalStepsScheduler]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    OptimalStepsScheduler,
+]
diff --git a/comfy_extras/v3/nodes_pag.py b/comfy_extras/v3/nodes_pag.py
index 4ea7b07a5..152a8f736 100644
--- a/comfy_extras/v3/nodes_pag.py
+++ b/comfy_extras/v3/nodes_pag.py
@@ -57,4 +57,6 @@ class PerturbedAttentionGuidance(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [PerturbedAttentionGuidance]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    PerturbedAttentionGuidance,
+]
diff --git a/comfy_extras/v3/nodes_perpneg.py b/comfy_extras/v3/nodes_perpneg.py
index a539320c3..8456201c5 100644
--- a/comfy_extras/v3/nodes_perpneg.py
+++ b/comfy_extras/v3/nodes_perpneg.py
@@ -109,4 +109,6 @@ class PerpNegGuider(io.ComfyNode):
         return io.NodeOutput(guider)
 
 
-NODES_LIST = [PerpNegGuider]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    PerpNegGuider,
+]
diff --git a/comfy_extras/v3/nodes_photomaker.py b/comfy_extras/v3/nodes_photomaker.py
index 7b742cb98..cec88816e 100644
--- a/comfy_extras/v3/nodes_photomaker.py
+++ b/comfy_extras/v3/nodes_photomaker.py
@@ -121,6 +121,32 @@ class PhotoMakerIDEncoder(comfy.clip_model.CLIPVisionModelProjection):
         return self.fuse_module(prompt_embeds, id_embeds, class_tokens_mask)
 
 
+class PhotoMakerLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PhotoMakerLoader_V3",
+            category="_for_testing/photomaker",
+            inputs=[
+                io.Combo.Input("photomaker_model_name", options=folder_paths.get_filename_list("photomaker")),
+            ],
+            outputs=[
+                io.Photomaker.Output(),
+            ],
+            is_experimental=True,
+        )
+
+    @classmethod
+    def execute(cls, photomaker_model_name):
+        photomaker_model_path = folder_paths.get_full_path_or_raise("photomaker", photomaker_model_name)
+        photomaker_model = PhotoMakerIDEncoder()
+        data = comfy.utils.load_torch_file(photomaker_model_path, safe_load=True)
+        if "id_encoder" in data:
+            data = data["id_encoder"]
+        photomaker_model.load_state_dict(data)
+        return io.NodeOutput(photomaker_model)
+
+
 class PhotoMakerEncode(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -173,33 +199,7 @@ class PhotoMakerEncode(io.ComfyNode):
         return io.NodeOutput([[out, {"pooled_output": pooled}]])
 
 
-class PhotoMakerLoader(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="PhotoMakerLoader_V3",
-            category="_for_testing/photomaker",
-            inputs=[
-                io.Combo.Input("photomaker_model_name", options=folder_paths.get_filename_list("photomaker")),
-            ],
-            outputs=[
-                io.Photomaker.Output(),
-            ],
-            is_experimental=True,
-        )
-
-    @classmethod
-    def execute(cls, photomaker_model_name):
-        photomaker_model_path = folder_paths.get_full_path_or_raise("photomaker", photomaker_model_name)
-        photomaker_model = PhotoMakerIDEncoder()
-        data = comfy.utils.load_torch_file(photomaker_model_path, safe_load=True)
-        if "id_encoder" in data:
-            data = data["id_encoder"]
-        photomaker_model.load_state_dict(data)
-        return io.NodeOutput(photomaker_model)
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     PhotoMakerEncode,
     PhotoMakerLoader,
 ]
diff --git a/comfy_extras/v3/nodes_pixart.py b/comfy_extras/v3/nodes_pixart.py
index 63276955f..2d5158e6a 100644
--- a/comfy_extras/v3/nodes_pixart.py
+++ b/comfy_extras/v3/nodes_pixart.py
@@ -28,6 +28,6 @@ class CLIPTextEncodePixArtAlpha(io.ComfyNode):
         return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height}))
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodePixArtAlpha,
 ]
diff --git a/comfy_extras/v3/nodes_post_processing.py b/comfy_extras/v3/nodes_post_processing.py
index 1b715f33c..c09da6c83 100644
--- a/comfy_extras/v3/nodes_post_processing.py
+++ b/comfy_extras/v3/nodes_post_processing.py
@@ -13,13 +13,6 @@ import node_helpers
 from comfy_api.latest import io
 
 
-def gaussian_kernel(kernel_size: int, sigma: float, device=None):
-    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
-    d = torch.sqrt(x * x + y * y)
-    g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
-    return g / g.sum()
-
-
 class Blend(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -109,36 +102,11 @@ class Blur(io.ComfyNode):
         return io.NodeOutput(blurred.to(comfy.model_management.intermediate_device()))
 
 
-class ImageScaleToTotalPixels(io.ComfyNode):
-    upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"]
-    crop_methods = ["disabled", "center"]
-
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ImageScaleToTotalPixels_V3",
-            category="image/upscaling",
-            inputs=[
-                io.Image.Input("image"),
-                io.Combo.Input("upscale_method", options=cls.upscale_methods),
-                io.Float.Input("megapixels", default=1.0, min=0.01, max=16.0, step=0.01),
-            ],
-            outputs=[
-                io.Image.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, image, upscale_method, megapixels):
-        samples = image.movedim(-1,1)
-        total = int(megapixels * 1024 * 1024)
-
-        scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
-        width = round(samples.shape[3] * scale_by)
-        height = round(samples.shape[2] * scale_by)
-
-        s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled")
-        return io.NodeOutput(s.movedim(1,-1))
+def gaussian_kernel(kernel_size: int, sigma: float, device=None):
+    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
+    d = torch.sqrt(x * x + y * y)
+    g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
+    return g / g.sum()
 
 
 class Quantize(io.ComfyNode):
@@ -246,7 +214,39 @@ class Sharpen(io.ComfyNode):
         return io.NodeOutput(result.to(comfy.model_management.intermediate_device()))
 
 
-NODES_LIST = [
+class ImageScaleToTotalPixels(io.ComfyNode):
+    upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"]
+    crop_methods = ["disabled", "center"]
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageScaleToTotalPixels_V3",
+            category="image/upscaling",
+            inputs=[
+                io.Image.Input("image"),
+                io.Combo.Input("upscale_method", options=cls.upscale_methods),
+                io.Float.Input("megapixels", default=1.0, min=0.01, max=16.0, step=0.01),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, image, upscale_method, megapixels):
+        samples = image.movedim(-1,1)
+        total = int(megapixels * 1024 * 1024)
+
+        scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
+        width = round(samples.shape[3] * scale_by)
+        height = round(samples.shape[2] * scale_by)
+
+        s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled")
+        return io.NodeOutput(s.movedim(1,-1))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     Blend,
     Blur,
     ImageScaleToTotalPixels,
diff --git a/comfy_extras/v3/nodes_rebatch.py b/comfy_extras/v3/nodes_rebatch.py
index 26fabde1b..7922de727 100644
--- a/comfy_extras/v3/nodes_rebatch.py
+++ b/comfy_extras/v3/nodes_rebatch.py
@@ -142,7 +142,7 @@ class LatentRebatch(io.ComfyNode):
         return io.NodeOutput(output_list)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     ImageRebatch,
     LatentRebatch,
 ]
diff --git a/comfy_extras/v3/nodes_sag.py b/comfy_extras/v3/nodes_sag.py
index 2c4290e09..84446b7e5 100644
--- a/comfy_extras/v3/nodes_sag.py
+++ b/comfy_extras/v3/nodes_sag.py
@@ -186,4 +186,6 @@ class SelfAttentionGuidance(io.ComfyNode):
 
         return io.NodeOutput(m)
 
-NODES_LIST = [SelfAttentionGuidance]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    SelfAttentionGuidance,
+]
diff --git a/comfy_extras/v3/nodes_sd3.py b/comfy_extras/v3/nodes_sd3.py
index d7401aad0..582eecc3f 100644
--- a/comfy_extras/v3/nodes_sd3.py
+++ b/comfy_extras/v3/nodes_sd3.py
@@ -10,6 +10,59 @@ from comfy_api.latest import io
 from comfy_extras.v3.nodes_slg import SkipLayerGuidanceDiT
 
 
+class TripleCLIPLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TripleCLIPLoader_V3",
+            category="advanced/loaders",
+            description="[Recipes]\n\nsd3: clip-l, clip-g, t5",
+            inputs=[
+                io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")),
+                io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")),
+                io.Combo.Input("clip_name3", options=folder_paths.get_filename_list("text_encoders")),
+            ],
+            outputs=[
+                io.Clip.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, clip_name1: str, clip_name2: str, clip_name3: str):
+        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", clip_name1)
+        clip_path2 = folder_paths.get_full_path_or_raise("text_encoders", clip_name2)
+        clip_path3 = folder_paths.get_full_path_or_raise("text_encoders", clip_name3)
+        clip = comfy.sd.load_clip(
+            ckpt_paths=[clip_path1, clip_path2, clip_path3],
+            embedding_directory=folder_paths.get_folder_paths("embeddings"),
+        )
+        return io.NodeOutput(clip)
+
+
+class EmptySD3LatentImage(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptySD3LatentImage_V3",
+            category="latent/sd3",
+            inputs=[
+                io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, width: int, height: int, batch_size=1):
+        latent = torch.zeros(
+            [batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device()
+        )
+        return io.NodeOutput({"samples":latent})
+
+
 class CLIPTextEncodeSD3(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -54,30 +107,6 @@ class CLIPTextEncodeSD3(io.ComfyNode):
         return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
 
 
-class EmptySD3LatentImage(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="EmptySD3LatentImage_V3",
-            category="latent/sd3",
-            inputs=[
-                io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
-                io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
-                io.Int.Input("batch_size", default=1, min=1, max=4096),
-            ],
-            outputs=[
-                io.Latent.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, width: int, height: int, batch_size=1):
-        latent = torch.zeros(
-            [batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device()
-        )
-        return io.NodeOutput({"samples":latent})
-
-
 class SkipLayerGuidanceSD3(SkipLayerGuidanceDiT):
     """
     Enhance guidance towards detailed dtructure by having another set of CFG negative with skipped layers.
@@ -108,36 +137,7 @@ class SkipLayerGuidanceSD3(SkipLayerGuidanceDiT):
             model=model, scale=scale, start_percent=start_percent, end_percent=end_percent, double_layers=layers
         )
 
-
-class TripleCLIPLoader(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="TripleCLIPLoader_V3",
-            category="advanced/loaders",
-            description="[Recipes]\n\nsd3: clip-l, clip-g, t5",
-            inputs=[
-                io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")),
-                io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")),
-                io.Combo.Input("clip_name3", options=folder_paths.get_filename_list("text_encoders")),
-            ],
-            outputs=[
-                io.Clip.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, clip_name1: str, clip_name2: str, clip_name3: str):
-        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", clip_name1)
-        clip_path2 = folder_paths.get_full_path_or_raise("text_encoders", clip_name2)
-        clip_path3 = folder_paths.get_full_path_or_raise("text_encoders", clip_name3)
-        clip = comfy.sd.load_clip(
-            ckpt_paths=[clip_path1, clip_path2, clip_path3],
-            embedding_directory=folder_paths.get_folder_paths("embeddings"),
-        )
-        return io.NodeOutput(clip)
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     CLIPTextEncodeSD3,
     EmptySD3LatentImage,
     SkipLayerGuidanceSD3,
diff --git a/comfy_extras/v3/nodes_sdupscale.py b/comfy_extras/v3/nodes_sdupscale.py
index 382a71909..a56fc12ad 100644
--- a/comfy_extras/v3/nodes_sdupscale.py
+++ b/comfy_extras/v3/nodes_sdupscale.py
@@ -53,4 +53,6 @@ class SD_4XUpscale_Conditioning(io.ComfyNode):
         latent = torch.zeros([images.shape[0], 4, height // 4, width // 4])
         return io.NodeOutput(out_cp, out_cn, {"samples":latent})
 
-NODES_LIST = [SD_4XUpscale_Conditioning]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    SD_4XUpscale_Conditioning,
+]
diff --git a/comfy_extras/v3/nodes_slg.py b/comfy_extras/v3/nodes_slg.py
index d98c225c2..27a8b3f93 100644
--- a/comfy_extras/v3/nodes_slg.py
+++ b/comfy_extras/v3/nodes_slg.py
@@ -167,7 +167,7 @@ class SkipLayerGuidanceDiTSimple(io.ComfyNode):
 
         return io.NodeOutput(m)
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     SkipLayerGuidanceDiT,
     SkipLayerGuidanceDiTSimple,
 ]
diff --git a/comfy_extras/v3/nodes_stable3d.py b/comfy_extras/v3/nodes_stable3d.py
new file mode 100644
index 000000000..fb47da835
--- /dev/null
+++ b/comfy_extras/v3/nodes_stable3d.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+import torch
+
+import comfy.utils
+import nodes
+from comfy_api.latest import io
+
+
+def camera_embeddings(elevation, azimuth):
+    elevation = torch.as_tensor([elevation])
+    azimuth = torch.as_tensor([azimuth])
+    embeddings = torch.stack(
+        [
+                torch.deg2rad(
+                    (90 - elevation) - 90
+                ),  # Zero123 polar is 90-elevation
+                torch.sin(torch.deg2rad(azimuth)),
+                torch.cos(torch.deg2rad(azimuth)),
+                torch.deg2rad(
+                    90 - torch.full_like(elevation, 0)
+                ),
+        ], dim=-1).unsqueeze(1)
+
+    return embeddings
+
+
+class StableZero123_Conditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StableZero123_Conditioning_V3",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
+        output = clip_vision.encode_image(init_image)
+        pooled = output.image_embeds.unsqueeze(0)
+        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
+        encode_pixels = pixels[:,:,:,:3]
+        t = vae.encode(encode_pixels)
+        cam_embeds = camera_embeddings(elevation, azimuth)
+        cond = torch.cat([pooled, cam_embeds.to(pooled.device).repeat((pooled.shape[0], 1, 1))], dim=-1)
+
+        positive = [[cond, {"concat_latent_image": t}]]
+        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
+        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
+        return io.NodeOutput(positive, negative, {"samples":latent})
+
+
+class StableZero123_Conditioning_Batched(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StableZero123_Conditioning_Batched_V3",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
+                io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
+        output = clip_vision.encode_image(init_image)
+        pooled = output.image_embeds.unsqueeze(0)
+        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
+        encode_pixels = pixels[:,:,:,:3]
+        t = vae.encode(encode_pixels)
+
+        cam_embeds = []
+        for i in range(batch_size):
+            cam_embeds.append(camera_embeddings(elevation, azimuth))
+            elevation += elevation_batch_increment
+            azimuth += azimuth_batch_increment
+
+        cam_embeds = torch.cat(cam_embeds, dim=0)
+        cond = torch.cat([comfy.utils.repeat_to_batch_size(pooled, batch_size), cam_embeds], dim=-1)
+
+        positive = [[cond, {"concat_latent_image": t}]]
+        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
+        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
+        return io.NodeOutput(positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
+
+
+class SV3D_Conditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SV3D_Conditioning_V3",
+            category="conditioning/3d_models",
+            inputs=[
+                io.ClipVision.Input("clip_vision"),
+                io.Image.Input("init_image"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("height", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("video_frames", default=21, min=1, max=4096),
+                io.Float.Input("elevation", default=0.0, min=-90.0, max=90.0, step=0.1, round=False)
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, clip_vision, init_image, vae, width, height, video_frames, elevation):
+        output = clip_vision.encode_image(init_image)
+        pooled = output.image_embeds.unsqueeze(0)
+        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
+        encode_pixels = pixels[:,:,:,:3]
+        t = vae.encode(encode_pixels)
+
+        azimuth = 0
+        azimuth_increment = 360 / (max(video_frames, 2) - 1)
+
+        elevations = []
+        azimuths = []
+        for i in range(video_frames):
+            elevations.append(elevation)
+            azimuths.append(azimuth)
+            azimuth += azimuth_increment
+
+        positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
+        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
+        latent = torch.zeros([video_frames, 4, height // 8, width // 8])
+        return io.NodeOutput(positive, negative, {"samples":latent})
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    StableZero123_Conditioning,
+    StableZero123_Conditioning_Batched,
+    SV3D_Conditioning,
+]
diff --git a/comfy_extras/v3/nodes_string.py b/comfy_extras/v3/nodes_string.py
new file mode 100644
index 000000000..b476f915a
--- /dev/null
+++ b/comfy_extras/v3/nodes_string.py
@@ -0,0 +1,380 @@
+from __future__ import annotations
+
+import re
+
+from comfy_api.latest import io
+
+
+class StringConcatenate(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringConcatenate_V3",
+            display_name="Concatenate _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string_a", multiline=True),
+                io.String.Input("string_b", multiline=True),
+                io.String.Input("delimiter", multiline=False, default="")
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string_a, string_b, delimiter):
+        return io.NodeOutput(delimiter.join((string_a, string_b)))
+
+
+class StringSubstring(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringSubstring_V3",
+            display_name="Substring _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.Int.Input("start"),
+                io.Int.Input("end")
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, start, end):
+        return io.NodeOutput(string[start:end])
+
+
+class StringLength(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringLength_V3",
+            display_name="Length _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True)
+            ],
+            outputs=[
+                io.Int.Output(display_name="length")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string):
+        return io.NodeOutput(len(string))
+
+
+class CaseConverter(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CaseConverter_V3",
+            display_name="Case Converter _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.Combo.Input("mode", options=["UPPERCASE", "lowercase", "Capitalize", "Title Case"])
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, mode):
+        if mode == "UPPERCASE":
+            result = string.upper()
+        elif mode == "lowercase":
+            result = string.lower()
+        elif mode == "Capitalize":
+            result = string.capitalize()
+        elif mode == "Title Case":
+            result = string.title()
+        else:
+            result = string
+
+        return io.NodeOutput(result)
+
+
+class StringTrim(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringTrim_V3",
+            display_name="Trim _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.Combo.Input("mode", options=["Both", "Left", "Right"])
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, mode):
+        if mode == "Both":
+            result = string.strip()
+        elif mode == "Left":
+            result = string.lstrip()
+        elif mode == "Right":
+            result = string.rstrip()
+        else:
+            result = string
+
+        return io.NodeOutput(result)
+
+
+class StringReplace(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringReplace_V3",
+            display_name="Replace _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.String.Input("find", multiline=True),
+                io.String.Input("replace", multiline=True)
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, find, replace):
+        return io.NodeOutput(string.replace(find, replace))
+
+
+class StringContains(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringContains_V3",
+            display_name="Contains _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.String.Input("substring", multiline=True),
+                io.Boolean.Input("case_sensitive", default=True)
+            ],
+            outputs=[
+                io.Boolean.Output(display_name="contains")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, substring, case_sensitive):
+        if case_sensitive:
+            contains = substring in string
+        else:
+            contains = substring.lower() in string.lower()
+
+        return io.NodeOutput(contains)
+
+
+class StringCompare(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="StringCompare_V3",
+            display_name="Compare _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string_a", multiline=True),
+                io.String.Input("string_b", multiline=True),
+                io.Combo.Input("mode", options=["Starts With", "Ends With", "Equal"]),
+                io.Boolean.Input("case_sensitive", default=True)
+            ],
+            outputs=[
+                io.Boolean.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string_a, string_b, mode, case_sensitive):
+        if case_sensitive:
+            a = string_a
+            b = string_b
+        else:
+            a = string_a.lower()
+            b = string_b.lower()
+
+        if mode == "Equal":
+            return io.NodeOutput(a == b)
+        elif mode == "Starts With":
+            return io.NodeOutput(a.startswith(b))
+        elif mode == "Ends With":
+            return io.NodeOutput(a.endswith(b))
+
+
+class RegexMatch(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RegexMatch_V3",
+            display_name="Regex Match _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.String.Input("regex_pattern", multiline=True),
+                io.Boolean.Input("case_insensitive", default=True),
+                io.Boolean.Input("multiline", default=False),
+                io.Boolean.Input("dotall", default=False)
+            ],
+            outputs=[
+                io.Boolean.Output(display_name="matches")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, regex_pattern, case_insensitive, multiline, dotall):
+        flags = 0
+
+        if case_insensitive:
+            flags |= re.IGNORECASE
+        if multiline:
+            flags |= re.MULTILINE
+        if dotall:
+            flags |= re.DOTALL
+
+        try:
+            match = re.search(regex_pattern, string, flags)
+            result = match is not None
+
+        except re.error:
+            result = False
+
+        return io.NodeOutput(result)
+
+
+class RegexExtract(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RegexExtract_V3",
+            display_name="Regex Extract _V3",
+            category="utils/string",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.String.Input("regex_pattern", multiline=True),
+                io.Combo.Input("mode", options=["First Match", "All Matches", "First Group", "All Groups"]),
+                io.Boolean.Input("case_insensitive", default=True),
+                io.Boolean.Input("multiline", default=False),
+                io.Boolean.Input("dotall", default=False),
+                io.Int.Input("group_index", default=1, min=0, max=100)
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, regex_pattern, mode, case_insensitive, multiline, dotall, group_index):
+        join_delimiter = "\n"
+
+        flags = 0
+        if case_insensitive:
+            flags |= re.IGNORECASE
+        if multiline:
+            flags |= re.MULTILINE
+        if dotall:
+            flags |= re.DOTALL
+
+        try:
+            if mode == "First Match":
+                match = re.search(regex_pattern, string, flags)
+                if match:
+                    result = match.group(0)
+                else:
+                    result = ""
+
+            elif mode == "All Matches":
+                matches = re.findall(regex_pattern, string, flags)
+                if matches:
+                    if isinstance(matches[0], tuple):
+                        result = join_delimiter.join([m[0] for m in matches])
+                    else:
+                        result = join_delimiter.join(matches)
+                else:
+                    result = ""
+
+            elif mode == "First Group":
+                match = re.search(regex_pattern, string, flags)
+                if match and len(match.groups()) >= group_index:
+                    result = match.group(group_index)
+                else:
+                    result = ""
+
+            elif mode == "All Groups":
+                matches = re.finditer(regex_pattern, string, flags)
+                results = []
+                for match in matches:
+                    if match.groups() and len(match.groups()) >= group_index:
+                        results.append(match.group(group_index))
+                result = join_delimiter.join(results)
+            else:
+                result = ""
+
+        except re.error:
+            result = ""
+
+        return io.NodeOutput(result)
+
+
+class RegexReplace(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RegexReplace_V3",
+            display_name="Regex Replace _V3",
+            category="utils/string",
+            description="Find and replace text using regex patterns.",
+            inputs=[
+                io.String.Input("string", multiline=True),
+                io.String.Input("regex_pattern", multiline=True),
+                io.String.Input("replace", multiline=True),
+                io.Boolean.Input("case_insensitive", default=True, optional=True),
+                io.Boolean.Input("multiline", default=False, optional=True),
+                io.Boolean.Input("dotall", default=False, optional=True, tooltip="When enabled, the dot (.) character will match any character including newline characters. When disabled, dots won't match newlines."),
+                io.Int.Input("count", default=0, min=0, max=100, optional=True, tooltip="Maximum number of replacements to make. Set to 0 to replace all occurrences (default). Set to 1 to replace only the first match, 2 for the first two matches, etc.")
+            ],
+            outputs=[
+                io.String.Output()
+            ]
+        )
+
+    @classmethod
+    def execute(cls, string, regex_pattern, replace, case_insensitive=True, multiline=False, dotall=False, count=0):
+        flags = 0
+
+        if case_insensitive:
+            flags |= re.IGNORECASE
+        if multiline:
+            flags |= re.MULTILINE
+        if dotall:
+            flags |= re.DOTALL
+        result = re.sub(regex_pattern, replace, string, count=count, flags=flags)
+        return io.NodeOutput(result)
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
+    CaseConverter,
+    RegexExtract,
+    RegexMatch,
+    RegexReplace,
+    StringCompare,
+    StringConcatenate,
+    StringContains,
+    StringLength,
+    StringReplace,
+    StringSubstring,
+    StringTrim,
+]
diff --git a/comfy_extras/v3/nodes_tcfg.py b/comfy_extras/v3/nodes_tcfg.py
index 2133136c5..9592c3aee 100644
--- a/comfy_extras/v3/nodes_tcfg.py
+++ b/comfy_extras/v3/nodes_tcfg.py
@@ -65,6 +65,6 @@ class TCFG(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     TCFG,
 ]
diff --git a/comfy_extras/v3/nodes_tomesd.py b/comfy_extras/v3/nodes_tomesd.py
index f824dcc27..fffaaba9b 100644
--- a/comfy_extras/v3/nodes_tomesd.py
+++ b/comfy_extras/v3/nodes_tomesd.py
@@ -185,6 +185,6 @@ class TomePatchModel(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     TomePatchModel,
 ]
diff --git a/comfy_extras/v3/nodes_torch_compile.py b/comfy_extras/v3/nodes_torch_compile.py
index ec047ef9e..f34e8c726 100644
--- a/comfy_extras/v3/nodes_torch_compile.py
+++ b/comfy_extras/v3/nodes_torch_compile.py
@@ -27,6 +27,6 @@ class TorchCompileModel(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     TorchCompileModel,
 ]
diff --git a/comfy_extras/v3/nodes_train.py b/comfy_extras/v3/nodes_train.py
index 9afc9d93c..695982b80 100644
--- a/comfy_extras/v3/nodes_train.py
+++ b/comfy_extras/v3/nodes_train.py
@@ -162,57 +162,6 @@ def load_and_process_images(image_files, input_dir, resize_method="None", w=None
     return torch.cat(output_images, dim=0)
 
 
-def draw_loss_graph(loss_map, steps):
-    width, height = 500, 300
-    img = Image.new("RGB", (width, height), "white")
-    draw = ImageDraw.Draw(img)
-
-    min_loss, max_loss = min(loss_map.values()), max(loss_map.values())
-    scaled_loss = [(l_v - min_loss) / (max_loss - min_loss) for l_v in loss_map.values()]
-
-    prev_point = (0, height - int(scaled_loss[0] * height))
-    for i, l_v in enumerate(scaled_loss[1:], start=1):
-        x = int(i / (steps - 1) * width)
-        y = height - int(l_v * height)
-        draw.line([prev_point, (x, y)], fill="blue", width=2)
-        prev_point = (x, y)
-
-    return img
-
-
-def find_all_highest_child_module_with_forward(model: torch.nn.Module, result = None, name = None):
-    if result is None:
-        result = []
-    elif hasattr(model, "forward") and not isinstance(model, (torch.nn.ModuleList, torch.nn.Sequential, torch.nn.ModuleDict)):
-        result.append(model)
-        logging.debug(f"Found module with forward: {name} ({model.__class__.__name__})")
-        return result
-    name = name or "root"
-    for next_name, child in model.named_children():
-        find_all_highest_child_module_with_forward(child, result, f"{name}.{next_name}")
-    return result
-
-
-def patch(m):
-    if not hasattr(m, "forward"):
-        return
-    org_forward = m.forward
-    def fwd(args, kwargs):
-        return org_forward(*args, **kwargs)
-    def checkpointing_fwd(*args, **kwargs):
-        return torch.utils.checkpoint.checkpoint(
-            fwd, args, kwargs, use_reentrant=False
-        )
-    m.org_forward = org_forward
-    m.forward = checkpointing_fwd
-
-
-def unpatch(m):
-    if hasattr(m, "org_forward"):
-        m.forward = m.org_forward
-        del m.org_forward
-
-
 class LoadImageSetFromFolderNode(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -328,126 +277,55 @@ class LoadImageTextSetFromFolderNode(io.ComfyNode):
         return io.NodeOutput(output_tensor, conditions)
 
 
-class LoraModelLoader(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LoraModelLoader_V3",
-            display_name="Load LoRA Model _V3",
-            category="loaders",
-            description="Load Trained LoRA weights from Train LoRA node.",
-            is_experimental=True,
-            inputs=[
-                io.Model.Input("model", tooltip="The diffusion model the LoRA will be applied to."),
-                io.LoraModel.Input("lora", tooltip="The LoRA model to apply to the diffusion model."),
-                io.Float.Input("strength_model", default=1.0, min=-100.0, max=100.0, step=0.01, tooltip="How strongly to modify the diffusion model. This value can be negative."),
-            ],
-            outputs=[
-                io.Model.Output(tooltip="The modified diffusion model."),
-            ],
+def draw_loss_graph(loss_map, steps):
+    width, height = 500, 300
+    img = Image.new("RGB", (width, height), "white")
+    draw = ImageDraw.Draw(img)
+
+    min_loss, max_loss = min(loss_map.values()), max(loss_map.values())
+    scaled_loss = [(l_v - min_loss) / (max_loss - min_loss) for l_v in loss_map.values()]
+
+    prev_point = (0, height - int(scaled_loss[0] * height))
+    for i, l_v in enumerate(scaled_loss[1:], start=1):
+        x = int(i / (steps - 1) * width)
+        y = height - int(l_v * height)
+        draw.line([prev_point, (x, y)], fill="blue", width=2)
+        prev_point = (x, y)
+
+    return img
+
+
+def find_all_highest_child_module_with_forward(model: torch.nn.Module, result = None, name = None):
+    if result is None:
+        result = []
+    elif hasattr(model, "forward") and not isinstance(model, (torch.nn.ModuleList, torch.nn.Sequential, torch.nn.ModuleDict)):
+        result.append(model)
+        logging.debug(f"Found module with forward: {name} ({model.__class__.__name__})")
+        return result
+    name = name or "root"
+    for next_name, child in model.named_children():
+        find_all_highest_child_module_with_forward(child, result, f"{name}.{next_name}")
+    return result
+
+
+def patch(m):
+    if not hasattr(m, "forward"):
+        return
+    org_forward = m.forward
+    def fwd(args, kwargs):
+        return org_forward(*args, **kwargs)
+    def checkpointing_fwd(*args, **kwargs):
+        return torch.utils.checkpoint.checkpoint(
+            fwd, args, kwargs, use_reentrant=False
         )
-
-    @classmethod
-    def execute(cls, model, lora, strength_model):
-        if strength_model == 0:
-            return io.NodeOutput(model)
-
-        model_lora, _ = comfy.sd.load_lora_for_models(model, None, lora, strength_model, 0)
-        return io.NodeOutput(model_lora)
+    m.org_forward = org_forward
+    m.forward = checkpointing_fwd
 
 
-class LossGraphNode(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="LossGraphNode_V3",
-            display_name="Plot Loss Graph _V3",
-            category="training",
-            description="Plots the loss graph and saves it to the output directory.",
-            is_experimental=True,
-            is_output_node=True,
-            inputs=[
-                io.LossMap.Input("loss"),  # TODO: original V1 node has also `default={}` parameter
-                io.String.Input("filename_prefix", default="loss_graph"),
-            ],
-            outputs=[],
-            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
-        )
-
-    @classmethod
-    def execute(cls, loss, filename_prefix):
-        loss_values = loss["loss"]
-        width, height = 800, 480
-        margin = 40
-
-        img = Image.new(
-            "RGB", (width + margin, height + margin), "white"
-        )  # Extend canvas
-        draw = ImageDraw.Draw(img)
-
-        min_loss, max_loss = min(loss_values), max(loss_values)
-        scaled_loss = [(l_v - min_loss) / (max_loss - min_loss) for l_v in loss_values]
-
-        steps = len(loss_values)
-
-        prev_point = (margin, height - int(scaled_loss[0] * height))
-        for i, l_v in enumerate(scaled_loss[1:], start=1):
-            x = margin + int(i / steps * width)  # Scale X properly
-            y = height - int(l_v * height)
-            draw.line([prev_point, (x, y)], fill="blue", width=2)
-            prev_point = (x, y)
-
-        draw.line([(margin, 0), (margin, height)], fill="black", width=2)  # Y-axis
-        draw.line(
-            [(margin, height), (width + margin, height)], fill="black", width=2
-        )  # X-axis
-
-        try:
-            font = ImageFont.truetype("arial.ttf", 12)
-        except IOError:
-            font = ImageFont.load_default()
-
-        # Add axis labels
-        draw.text((5, height // 2), "Loss", font=font, fill="black")
-        draw.text((width // 2, height + 10), "Steps", font=font, fill="black")
-
-        # Add min/max loss values
-        draw.text((margin - 30, 0), f"{max_loss:.2f}", font=font, fill="black")
-        draw.text(
-            (margin - 30, height - 10), f"{min_loss:.2f}", font=font, fill="black"
-        )
-        return io.NodeOutput(ui=ui.PreviewImage(img, cls=cls))
-
-
-class SaveLoRA(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="SaveLoRA_V3",
-            display_name="Save LoRA Weights _V3",
-            category="loaders",
-            is_experimental=True,
-            is_output_node=True,
-            inputs=[
-                io.LoraModel.Input("lora", tooltip="The LoRA model to save. Do not use the model with LoRA layers."),
-                io.String.Input("prefix", default="loras/ComfyUI_trained_lora", tooltip="The prefix to use for the saved LoRA file."),
-                io.Int.Input("steps", tooltip="Optional: The number of steps to LoRA has been trained for, used to name the saved file.", optional=True),
-            ],
-            outputs=[],
-        )
-
-    @classmethod
-    def execute(cls, lora, prefix, steps=None):
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
-            prefix, folder_paths.get_output_directory()
-        )
-        if steps is None:
-            output_checkpoint = f"{filename}_{counter:05}_.safetensors"
-        else:
-            output_checkpoint = f"{filename}_{steps}_steps_{counter:05}_.safetensors"
-        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
-        safetensors.torch.save_file(lora, output_checkpoint)
-        return io.NodeOutput()
+def unpatch(m):
+    if hasattr(m, "org_forward"):
+        m.forward = m.org_forward
+        del m.org_forward
 
 
 class TrainLoraNode(io.ComfyNode):
@@ -656,7 +534,129 @@ class TrainLoraNode(io.ComfyNode):
             return io.NodeOutput(mp, lora_sd, loss_map, steps + existing_steps)
 
 
-NODES_LIST = [
+class LoraModelLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoraModelLoader_V3",
+            display_name="Load LoRA Model _V3",
+            category="loaders",
+            description="Load Trained LoRA weights from Train LoRA node.",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model", tooltip="The diffusion model the LoRA will be applied to."),
+                io.LoraModel.Input("lora", tooltip="The LoRA model to apply to the diffusion model."),
+                io.Float.Input("strength_model", default=1.0, min=-100.0, max=100.0, step=0.01, tooltip="How strongly to modify the diffusion model. This value can be negative."),
+            ],
+            outputs=[
+                io.Model.Output(tooltip="The modified diffusion model."),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, lora, strength_model):
+        if strength_model == 0:
+            return io.NodeOutput(model)
+
+        model_lora, _ = comfy.sd.load_lora_for_models(model, None, lora, strength_model, 0)
+        return io.NodeOutput(model_lora)
+
+
+class SaveLoRA(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveLoRA_V3",
+            display_name="Save LoRA Weights _V3",
+            category="loaders",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                io.LoraModel.Input("lora", tooltip="The LoRA model to save. Do not use the model with LoRA layers."),
+                io.String.Input("prefix", default="loras/ComfyUI_trained_lora", tooltip="The prefix to use for the saved LoRA file."),
+                io.Int.Input("steps", tooltip="Optional: The number of steps to LoRA has been trained for, used to name the saved file.", optional=True),
+            ],
+            outputs=[],
+        )
+
+    @classmethod
+    def execute(cls, lora, prefix, steps=None):
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
+            prefix, folder_paths.get_output_directory()
+        )
+        if steps is None:
+            output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+        else:
+            output_checkpoint = f"{filename}_{steps}_steps_{counter:05}_.safetensors"
+        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+        safetensors.torch.save_file(lora, output_checkpoint)
+        return io.NodeOutput()
+
+
+class LossGraphNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LossGraphNode_V3",
+            display_name="Plot Loss Graph _V3",
+            category="training",
+            description="Plots the loss graph and saves it to the output directory.",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                io.LossMap.Input("loss"),  # TODO: original V1 node has also `default={}` parameter
+                io.String.Input("filename_prefix", default="loss_graph"),
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
+        )
+
+    @classmethod
+    def execute(cls, loss, filename_prefix):
+        loss_values = loss["loss"]
+        width, height = 800, 480
+        margin = 40
+
+        img = Image.new(
+            "RGB", (width + margin, height + margin), "white"
+        )  # Extend canvas
+        draw = ImageDraw.Draw(img)
+
+        min_loss, max_loss = min(loss_values), max(loss_values)
+        scaled_loss = [(l_v - min_loss) / (max_loss - min_loss) for l_v in loss_values]
+
+        steps = len(loss_values)
+
+        prev_point = (margin, height - int(scaled_loss[0] * height))
+        for i, l_v in enumerate(scaled_loss[1:], start=1):
+            x = margin + int(i / steps * width)  # Scale X properly
+            y = height - int(l_v * height)
+            draw.line([prev_point, (x, y)], fill="blue", width=2)
+            prev_point = (x, y)
+
+        draw.line([(margin, 0), (margin, height)], fill="black", width=2)  # Y-axis
+        draw.line(
+            [(margin, height), (width + margin, height)], fill="black", width=2
+        )  # X-axis
+
+        try:
+            font = ImageFont.truetype("arial.ttf", 12)
+        except IOError:
+            font = ImageFont.load_default()
+
+        # Add axis labels
+        draw.text((5, height // 2), "Loss", font=font, fill="black")
+        draw.text((width // 2, height + 10), "Steps", font=font, fill="black")
+
+        # Add min/max loss values
+        draw.text((margin - 30, 0), f"{max_loss:.2f}", font=font, fill="black")
+        draw.text(
+            (margin - 30, height - 10), f"{min_loss:.2f}", font=font, fill="black"
+        )
+        return io.NodeOutput(ui=ui.PreviewImage(img, cls=cls))
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     LoadImageSetFromFolderNode,
     LoadImageTextSetFromFolderNode,
     LoraModelLoader,
diff --git a/comfy_extras/v3/nodes_upscale_model.py b/comfy_extras/v3/nodes_upscale_model.py
index 9b892ba3c..c514603e4 100644
--- a/comfy_extras/v3/nodes_upscale_model.py
+++ b/comfy_extras/v3/nodes_upscale_model.py
@@ -19,6 +19,35 @@ except Exception:
     pass
 
 
+class UpscaleModelLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="UpscaleModelLoader_V3",
+            display_name="Load Upscale Model _V3",
+            category="loaders",
+            inputs=[
+                io.Combo.Input("model_name", options=folder_paths.get_filename_list("upscale_models")),
+            ],
+            outputs=[
+                io.UpscaleModel.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model_name):
+        model_path = folder_paths.get_full_path_or_raise("upscale_models", model_name)
+        sd = comfy.utils.load_torch_file(model_path, safe_load=True)
+        if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd:
+            sd = comfy.utils.state_dict_prefix_replace(sd, {"module.":""})
+        out = ModelLoader().load_from_state_dict(sd).eval()
+
+        if not isinstance(out, ImageModelDescriptor):
+            raise Exception("Upscale model must be a single-image model.")
+
+        return io.NodeOutput(out)
+
+
 class ImageUpscaleWithModel(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -71,36 +100,7 @@ class ImageUpscaleWithModel(io.ComfyNode):
         return io.NodeOutput(s)
 
 
-class UpscaleModelLoader(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="UpscaleModelLoader_V3",
-            display_name="Load Upscale Model _V3",
-            category="loaders",
-            inputs=[
-                io.Combo.Input("model_name", options=folder_paths.get_filename_list("upscale_models")),
-            ],
-            outputs=[
-                io.UpscaleModel.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, model_name):
-        model_path = folder_paths.get_full_path_or_raise("upscale_models", model_name)
-        sd = comfy.utils.load_torch_file(model_path, safe_load=True)
-        if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd:
-            sd = comfy.utils.state_dict_prefix_replace(sd, {"module.":""})
-        out = ModelLoader().load_from_state_dict(sd).eval()
-
-        if not isinstance(out, ImageModelDescriptor):
-            raise Exception("Upscale model must be a single-image model.")
-
-        return io.NodeOutput(out)
-
-
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     ImageUpscaleWithModel,
     UpscaleModelLoader,
 ]
diff --git a/comfy_extras/v3/nodes_video.py b/comfy_extras/v3/nodes_video.py
index 0fb8b0f5b..611469da7 100644
--- a/comfy_extras/v3/nodes_video.py
+++ b/comfy_extras/v3/nodes_video.py
@@ -15,6 +15,108 @@ from comfy_api.latest import io, ui
 from comfy_api.util import VideoCodec, VideoComponents, VideoContainer
 
 
+class SaveWEBM(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveWEBM_V3",
+            category="image/video",
+            is_experimental=True,
+            inputs=[
+                io.Image.Input("images"),
+                io.String.Input("filename_prefix", default="ComfyUI"),
+                io.Combo.Input("codec", options=["vp9", "av1"]),
+                io.Float.Input("fps", default=24.0, min=0.01, max=1000.0, step=0.01),
+                io.Float.Input("crf", default=32.0, min=0, max=63.0, step=1, tooltip="Higher crf means lower quality with a smaller file size, lower crf means higher quality higher filesize."),
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
+
+    @classmethod
+    def execute(cls, images, codec, fps, filename_prefix, crf):
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
+            filename_prefix, folder_paths.get_output_directory(), images[0].shape[1], images[0].shape[0]
+        )
+
+        file = f"{filename}_{counter:05}_.webm"
+        container = av.open(os.path.join(full_output_folder, file), mode="w")
+
+        if cls.hidden.prompt is not None:
+            container.metadata["prompt"] = json.dumps(cls.hidden.prompt)
+
+        if cls.hidden.extra_pnginfo is not None:
+            for x in cls.hidden.extra_pnginfo:
+                container.metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
+
+        codec_map = {"vp9": "libvpx-vp9", "av1": "libsvtav1"}
+        stream = container.add_stream(codec_map[codec], rate=Fraction(round(fps * 1000), 1000))
+        stream.width = images.shape[-2]
+        stream.height = images.shape[-3]
+        stream.pix_fmt = "yuv420p10le" if codec == "av1" else "yuv420p"
+        stream.bit_rate = 0
+        stream.options = {'crf': str(crf)}
+        if codec == "av1":
+            stream.options["preset"] = "6"
+
+        for frame in images:
+            frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
+            for packet in stream.encode(frame):
+                container.mux(packet)
+        container.mux(stream.encode())
+        container.close()
+
+        return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(file, subfolder, io.FolderType.output)]))
+
+
+class SaveVideo(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveVideo_V3",
+            display_name="Save Video _V3",
+            category="image/video",
+            description="Saves the input images to your ComfyUI output directory.",
+            inputs=[
+                io.Video.Input("video", tooltip="The video to save."),
+                io.String.Input("filename_prefix", default="video/ComfyUI", tooltip="The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."),
+                io.Combo.Input("format", options=VideoContainer.as_input(), default="auto", tooltip="The format to save the video as."),
+                io.Combo.Input("codec", options=VideoCodec.as_input(), default="auto", tooltip="The codec to use for the video."),
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
+
+    @classmethod
+    def execute(cls, video: VideoInput, filename_prefix, format, codec):
+        width, height = video.get_dimensions()
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
+            filename_prefix,
+            folder_paths.get_output_directory(),
+            width,
+            height
+        )
+        saved_metadata = None
+        if not args.disable_metadata:
+            metadata = {}
+            if cls.hidden.extra_pnginfo is not None:
+                metadata.update(cls.hidden.extra_pnginfo)
+            if cls.hidden.prompt is not None:
+                metadata["prompt"] = cls.hidden.prompt
+            if len(metadata) > 0:
+                saved_metadata = metadata
+        file = f"{filename}_{counter:05}_.{VideoContainer.get_extension(format)}"
+        video.save_to(
+            os.path.join(full_output_folder, file),
+            format=format,
+            codec=codec,
+            metadata=saved_metadata
+        )
+        return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(file, subfolder, io.FolderType.output)]))
+
+
 class CreateVideo(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -35,13 +137,9 @@ class CreateVideo(io.ComfyNode):
 
     @classmethod
     def execute(cls, images: ImageInput, fps: float, audio: AudioInput = None):
-        return io.NodeOutput(VideoFromComponents(
-            VideoComponents(
-            images=images,
-            audio=audio,
-            frame_rate=Fraction(fps),
-            )
-        ))
+        return io.NodeOutput(
+            VideoFromComponents(VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)))
+        )
 
 
 class GetVideoComponents(io.ComfyNode):
@@ -105,106 +203,10 @@ class LoadVideo(io.ComfyNode):
         return True
 
 
-class SaveVideo(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="SaveVideo_V3",
-            display_name="Save Video _V3",
-            category="image/video",
-            description="Saves the input images to your ComfyUI output directory.",
-            inputs=[
-                io.Video.Input("video", tooltip="The video to save."),
-                io.String.Input("filename_prefix", default="video/ComfyUI", tooltip="The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."),
-                io.Combo.Input("format", options=VideoContainer.as_input(), default="auto", tooltip="The format to save the video as."),
-                io.Combo.Input("codec", options=VideoCodec.as_input(), default="auto", tooltip="The codec to use for the video."),
-            ],
-            outputs=[],
-            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
-            is_output_node=True,
-        )
-
-    @classmethod
-    def execute(cls, video: VideoInput, filename_prefix, format, codec):
-        width, height = video.get_dimensions()
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
-            filename_prefix,
-            folder_paths.get_output_directory(),
-            width,
-            height
-        )
-        saved_metadata = None
-        if not args.disable_metadata:
-            metadata = {}
-            if cls.hidden.extra_pnginfo is not None:
-                metadata.update(cls.hidden.extra_pnginfo)
-            if cls.hidden.prompt is not None:
-                metadata["prompt"] = cls.hidden.prompt
-            if len(metadata) > 0:
-                saved_metadata = metadata
-        file = f"{filename}_{counter:05}_.{VideoContainer.get_extension(format)}"
-        video.save_to(
-            os.path.join(full_output_folder, file),
-            format=format,
-            codec=codec,
-            metadata=saved_metadata
-        )
-        return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(file, subfolder, io.FolderType.output)]))
-
-
-class SaveWEBM(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="SaveWEBM_V3",
-            category="image/video",
-            is_experimental=True,
-            inputs=[
-                io.Image.Input("images"),
-                io.String.Input("filename_prefix", default="ComfyUI"),
-                io.Combo.Input("codec", options=["vp9", "av1"]),
-                io.Float.Input("fps", default=24.0, min=0.01, max=1000.0, step=0.01),
-                io.Float.Input("crf", default=32.0, min=0, max=63.0, step=1, tooltip="Higher crf means lower quality with a smaller file size, lower crf means higher quality higher filesize."),
-            ],
-            outputs=[],
-            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
-            is_output_node=True,
-        )
-
-    @classmethod
-    def execute(cls, images, codec, fps, filename_prefix, crf):
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
-            filename_prefix, folder_paths.get_output_directory(), images[0].shape[1], images[0].shape[0]
-        )
-
-        file = f"{filename}_{counter:05}_.webm"
-        container = av.open(os.path.join(full_output_folder, file), mode="w")
-
-        if cls.hidden.prompt is not None:
-            container.metadata["prompt"] = json.dumps(cls.hidden.prompt)
-
-        if cls.hidden.extra_pnginfo is not None:
-            for x in cls.hidden.extra_pnginfo:
-                container.metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
-
-        codec_map = {"vp9": "libvpx-vp9", "av1": "libsvtav1"}
-        stream = container.add_stream(codec_map[codec], rate=Fraction(round(fps * 1000), 1000))
-        stream.width = images.shape[-2]
-        stream.height = images.shape[-3]
-        stream.pix_fmt = "yuv420p10le" if codec == "av1" else "yuv420p"
-        stream.bit_rate = 0
-        stream.options = {'crf': str(crf)}
-        if codec == "av1":
-            stream.options["preset"] = "6"
-
-        for frame in images:
-            frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
-            for packet in stream.encode(frame):
-                container.mux(packet)
-        container.mux(stream.encode())
-        container.close()
-
-        return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(file, subfolder, io.FolderType.output)]))
-
-
-NODES_LIST = [CreateVideo, GetVideoComponents, LoadVideo, SaveVideo, SaveWEBM]
+NODES_LIST: list[type[io.ComfyNode]] = [
+    CreateVideo,
+    GetVideoComponents,
+    LoadVideo,
+    SaveVideo,
+    SaveWEBM,
+]
diff --git a/comfy_extras/v3/nodes_video_model.py b/comfy_extras/v3/nodes_video_model.py
index e0ee00d73..9ea4b3546 100644
--- a/comfy_extras/v3/nodes_video_model.py
+++ b/comfy_extras/v3/nodes_video_model.py
@@ -11,40 +11,6 @@ import nodes
 from comfy_api.latest import io
 
 
-class ConditioningSetAreaPercentageVideo(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ConditioningSetAreaPercentageVideo_V3",
-            category="conditioning",
-            inputs=[
-                io.Conditioning.Input("conditioning"),
-                io.Float.Input("width", default=1.0, min=0, max=1.0, step=0.01),
-                io.Float.Input("height", default=1.0, min=0, max=1.0, step=0.01),
-                io.Float.Input("temporal", default=1.0, min=0, max=1.0, step=0.01),
-                io.Float.Input("x", default=0, min=0, max=1.0, step=0.01),
-                io.Float.Input("y", default=0, min=0, max=1.0, step=0.01),
-                io.Float.Input("z", default=0, min=0, max=1.0, step=0.01),
-                io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
-            ],
-            outputs=[
-                io.Conditioning.Output(),
-            ],
-        )
-
-    @classmethod
-    def execute(cls, conditioning, width, height, temporal, x, y, z, strength):
-        c = node_helpers.conditioning_set_values(
-            conditioning,
-            {
-                "area": ("percentage", temporal, height, width, z, y, x),
-                "strength": strength,
-                "set_area_to_bounds": False
-            ,}
-        )
-        return io.NodeOutput(c)
-
-
 class ImageOnlyCheckpointLoader(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -75,37 +41,6 @@ class ImageOnlyCheckpointLoader(io.ComfyNode):
         return io.NodeOutput(out[0], out[3], out[2])
 
 
-class ImageOnlyCheckpointSave(io.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return io.Schema(
-            node_id="ImageOnlyCheckpointSave_V3",
-            category="advanced/model_merging",
-            inputs=[
-                io.Model.Input("model"),
-                io.ClipVision.Input("clip_vision"),
-                io.Vae.Input("vae"),
-                io.String.Input("filename_prefix", default="checkpoints/ComfyUI"),
-            ],
-            outputs=[],
-            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
-        )
-
-    @classmethod
-    def execute(cls, model, clip_vision, vae, filename_prefix):
-        output_dir = folder_paths.get_output_directory()
-        comfy_extras.nodes_model_merging.save_checkpoint(
-            model,
-            clip_vision=clip_vision,
-            vae=vae,
-            filename_prefix=filename_prefix,
-            output_dir=output_dir,
-            prompt=cls.hidden.prompt,
-            extra_pnginfo=cls.hidden.extra_pnginfo,
-        )
-        return io.NodeOutput()
-
-
 class SVD_img2vid_Conditioning(io.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -222,7 +157,72 @@ class VideoTriangleCFGGuidance(io.ComfyNode):
         return io.NodeOutput(m)
 
 
-NODES_LIST = [
+class ImageOnlyCheckpointSave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageOnlyCheckpointSave_V3",
+            category="advanced/model_merging",
+            inputs=[
+                io.Model.Input("model"),
+                io.ClipVision.Input("clip_vision"),
+                io.Vae.Input("vae"),
+                io.String.Input("filename_prefix", default="checkpoints/ComfyUI"),
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
+        )
+
+    @classmethod
+    def execute(cls, model, clip_vision, vae, filename_prefix):
+        output_dir = folder_paths.get_output_directory()
+        comfy_extras.nodes_model_merging.save_checkpoint(
+            model,
+            clip_vision=clip_vision,
+            vae=vae,
+            filename_prefix=filename_prefix,
+            output_dir=output_dir,
+            prompt=cls.hidden.prompt,
+            extra_pnginfo=cls.hidden.extra_pnginfo,
+        )
+        return io.NodeOutput()
+
+
+class ConditioningSetAreaPercentageVideo(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ConditioningSetAreaPercentageVideo_V3",
+            category="conditioning",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+                io.Float.Input("width", default=1.0, min=0, max=1.0, step=0.01),
+                io.Float.Input("height", default=1.0, min=0, max=1.0, step=0.01),
+                io.Float.Input("temporal", default=1.0, min=0, max=1.0, step=0.01),
+                io.Float.Input("x", default=0, min=0, max=1.0, step=0.01),
+                io.Float.Input("y", default=0, min=0, max=1.0, step=0.01),
+                io.Float.Input("z", default=0, min=0, max=1.0, step=0.01),
+                io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, conditioning, width, height, temporal, x, y, z, strength):
+        c = node_helpers.conditioning_set_values(
+            conditioning,
+            {
+                "area": ("percentage", temporal, height, width, z, y, x),
+                "strength": strength,
+                "set_area_to_bounds": False
+            ,}
+        )
+        return io.NodeOutput(c)
+
+
+NODES_LIST: list[type[io.ComfyNode]] = [
     ConditioningSetAreaPercentageVideo,
     ImageOnlyCheckpointLoader,
     ImageOnlyCheckpointSave,
diff --git a/comfy_extras/v3/nodes_wan.py b/comfy_extras/v3/nodes_wan.py
index 43c999ad5..cd8b3ead9 100644
--- a/comfy_extras/v3/nodes_wan.py
+++ b/comfy_extras/v3/nodes_wan.py
@@ -425,7 +425,7 @@ class WanVaceToVideo(io.ComfyNode):
         return io.NodeOutput(positive, negative, out_latent, trim_latent)
 
 
-NODES_LIST = [
+NODES_LIST: list[type[io.ComfyNode]] = [
     TrimVideoLatent,
     WanCameraImageToVideo,
     WanFirstLastFrameToVideo,
diff --git a/nodes.py b/nodes.py
index 78745a15a..51df2b064 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2316,6 +2316,7 @@ async def init_builtin_extra_nodes():
         "v3/nodes_cond.py",
         "v3/nodes_controlnet.py",
         "v3/nodes_cosmos.py",
+        "v3/nodes_custom_sampler.py",
         "v3/nodes_differential_diffusion.py",
         "v3/nodes_edit_model.py",
         "v3/nodes_flux.py",
@@ -2323,7 +2324,9 @@ async def init_builtin_extra_nodes():
         "v3/nodes_fresca.py",
         "v3/nodes_gits.py",
         "v3/nodes_hidream.py",
+        # "v3/nodes_hooks.py",
         "v3/nodes_hunyuan.py",
+        "v3/nodes_hunyuan3d.py",
         "v3/nodes_hypernetwork.py",
         "v3/nodes_hypertile.py",
         "v3/nodes_images.py",
@@ -2334,10 +2337,13 @@ async def init_builtin_extra_nodes():
         "v3/nodes_lotus.py",
         "v3/nodes_lt.py",
         "v3/nodes_lumina2.py",
+        "v3/nodes_mahiro.py",
         "v3/nodes_mask.py",
         "v3/nodes_mochi.py",
         "v3/nodes_model_advanced.py",
         "v3/nodes_model_downscale.py",
+        "v3/nodes_model_merging.py",
+        "v3/nodes_model_merging_model_specific.py",
         "v3/nodes_morphology.py",
         "v3/nodes_optimalsteps.py",
         "v3/nodes_pag.py",
@@ -2352,7 +2358,9 @@ async def init_builtin_extra_nodes():
         "v3/nodes_sd3.py",
         "v3/nodes_sdupscale.py",
         "v3/nodes_slg.py",
+        "v3/nodes_stable3d.py",
         "v3/nodes_stable_cascade.py",
+        "v3/nodes_string.py",
         "v3/nodes_tcfg.py",
         "v3/nodes_tomesd.py",
         "v3/nodes_torch_compile.py",