From c7bb3e2bceaad7accd52c23d22b97a1b6808304b Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 28 Aug 2025 19:46:57 -0700
Subject: [PATCH] Support the 5B fun inpaint model. (#9614)

Use the WanFunInpaintToVideo node without the clip_vision_output.
---
 comfy_extras/nodes_wan.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py
index 8c1d36613..4f73369f5 100644
--- a/comfy_extras/nodes_wan.py
+++ b/comfy_extras/nodes_wan.py
@@ -206,7 +206,8 @@ class WanFirstLastFrameToVideo(io.ComfyNode):
 
     @classmethod
     def execute(cls, positive, negative, vae, width, height, length, batch_size, start_image=None, end_image=None, clip_vision_start_image=None, clip_vision_end_image=None) -> io.NodeOutput:
-        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
+        spacial_scale = vae.spacial_compression_encode()
+        latent = torch.zeros([batch_size, vae.latent_channels, ((length - 1) // 4) + 1, height // spacial_scale, width // spacial_scale], device=comfy.model_management.intermediate_device())
         if start_image is not None:
             start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
         if end_image is not None: